AMDGPU.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements AMDGPU TargetInfo objects.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "AMDGPU.h"
  13. #include "clang/Basic/Builtins.h"
  14. #include "clang/Basic/CodeGenOptions.h"
  15. #include "clang/Basic/Diagnostic.h"
  16. #include "clang/Basic/LangOptions.h"
  17. #include "clang/Basic/MacroBuilder.h"
  18. #include "clang/Basic/TargetBuiltins.h"
  19. using namespace clang;
  20. using namespace clang::targets;
  21. namespace clang {
  22. namespace targets {
  23. // If you edit the description strings, make sure you update
  24. // getPointerWidthV().
  25. static const char *const DataLayoutStringR600 =
  26. "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  27. "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
  28. static const char *const DataLayoutStringAMDGCN =
  29. "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
  30. "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  31. "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
  32. "-ni:7";
  33. const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
  34. Generic, // Default
  35. Global, // opencl_global
  36. Local, // opencl_local
  37. Constant, // opencl_constant
  38. Private, // opencl_private
  39. Generic, // opencl_generic
  40. Global, // opencl_global_device
  41. Global, // opencl_global_host
  42. Global, // cuda_device
  43. Constant, // cuda_constant
  44. Local, // cuda_shared
  45. Global, // sycl_global
  46. Global, // sycl_global_device
  47. Global, // sycl_global_host
  48. Local, // sycl_local
  49. Private, // sycl_private
  50. Generic, // ptr32_sptr
  51. Generic, // ptr32_uptr
  52. Generic, // ptr64
  53. Generic, // hlsl_groupshared
  54. };
  55. const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
  56. Private, // Default
  57. Global, // opencl_global
  58. Local, // opencl_local
  59. Constant, // opencl_constant
  60. Private, // opencl_private
  61. Generic, // opencl_generic
  62. Global, // opencl_global_device
  63. Global, // opencl_global_host
  64. Global, // cuda_device
  65. Constant, // cuda_constant
  66. Local, // cuda_shared
  67. // SYCL address space values for this map are dummy
  68. Generic, // sycl_global
  69. Generic, // sycl_global_device
  70. Generic, // sycl_global_host
  71. Generic, // sycl_local
  72. Generic, // sycl_private
  73. Generic, // ptr32_sptr
  74. Generic, // ptr32_uptr
  75. Generic, // ptr64
  76. Generic, // hlsl_groupshared
  77. };
  78. } // namespace targets
  79. } // namespace clang
  80. static constexpr Builtin::Info BuiltinInfo[] = {
  81. #define BUILTIN(ID, TYPE, ATTRS) \
  82. {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
  83. #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
  84. {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
  85. #include "clang/Basic/BuiltinsAMDGPU.def"
  86. };
  87. const char *const AMDGPUTargetInfo::GCCRegNames[] = {
  88. "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
  89. "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
  90. "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
  91. "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
  92. "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
  93. "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
  94. "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
  95. "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
  96. "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
  97. "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
  98. "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
  99. "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
  100. "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
  101. "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
  102. "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
  103. "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
  104. "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
  105. "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
  106. "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
  107. "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
  108. "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
  109. "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
  110. "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
  111. "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
  112. "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
  113. "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
  114. "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
  115. "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
  116. "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
  117. "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
  118. "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
  119. "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
  120. "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
  121. "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
  122. "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
  123. "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
  124. "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
  125. "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
  126. "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
  127. "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
  128. "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
  129. "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
  130. "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
  131. "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
  132. "flat_scratch_lo", "flat_scratch_hi",
  133. "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
  134. "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
  135. "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
  136. "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
  137. "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
  138. "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
  139. "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
  140. "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
  141. "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
  142. "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
  143. "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
  144. "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
  145. "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
  146. "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
  147. "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
  148. "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
  149. "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
  150. "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
  151. "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
  152. "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
  153. "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
  154. "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
  155. "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
  156. "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
  157. "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
  158. "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
  159. "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
  160. "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
  161. "a252", "a253", "a254", "a255"
  162. };
  163. ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
  164. return llvm::ArrayRef(GCCRegNames);
  165. }
  166. bool AMDGPUTargetInfo::initFeatureMap(
  167. llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
  168. const std::vector<std::string> &FeatureVec) const {
  169. const bool IsNullCPU = CPU.empty();
  170. bool IsWave32Capable = false;
  171. using namespace llvm::AMDGPU;
  172. // XXX - What does the member GPU mean if device name string passed here?
  173. if (isAMDGCN(getTriple())) {
  174. switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
  175. case GK_GFX1103:
  176. case GK_GFX1102:
  177. case GK_GFX1101:
  178. case GK_GFX1100:
  179. IsWave32Capable = true;
  180. Features["ci-insts"] = true;
  181. Features["dot5-insts"] = true;
  182. Features["dot7-insts"] = true;
  183. Features["dot8-insts"] = true;
  184. Features["dot9-insts"] = true;
  185. Features["dl-insts"] = true;
  186. Features["16-bit-insts"] = true;
  187. Features["dpp"] = true;
  188. Features["gfx8-insts"] = true;
  189. Features["gfx9-insts"] = true;
  190. Features["gfx10-insts"] = true;
  191. Features["gfx10-3-insts"] = true;
  192. Features["gfx11-insts"] = true;
  193. break;
  194. case GK_GFX1036:
  195. case GK_GFX1035:
  196. case GK_GFX1034:
  197. case GK_GFX1033:
  198. case GK_GFX1032:
  199. case GK_GFX1031:
  200. case GK_GFX1030:
  201. IsWave32Capable = true;
  202. Features["ci-insts"] = true;
  203. Features["dot1-insts"] = true;
  204. Features["dot2-insts"] = true;
  205. Features["dot5-insts"] = true;
  206. Features["dot6-insts"] = true;
  207. Features["dot7-insts"] = true;
  208. Features["dl-insts"] = true;
  209. Features["16-bit-insts"] = true;
  210. Features["dpp"] = true;
  211. Features["gfx8-insts"] = true;
  212. Features["gfx9-insts"] = true;
  213. Features["gfx10-insts"] = true;
  214. Features["gfx10-3-insts"] = true;
  215. Features["s-memrealtime"] = true;
  216. Features["s-memtime-inst"] = true;
  217. break;
  218. case GK_GFX1012:
  219. case GK_GFX1011:
  220. Features["dot1-insts"] = true;
  221. Features["dot2-insts"] = true;
  222. Features["dot5-insts"] = true;
  223. Features["dot6-insts"] = true;
  224. Features["dot7-insts"] = true;
  225. [[fallthrough]];
  226. case GK_GFX1013:
  227. case GK_GFX1010:
  228. IsWave32Capable = true;
  229. Features["dl-insts"] = true;
  230. Features["ci-insts"] = true;
  231. Features["16-bit-insts"] = true;
  232. Features["dpp"] = true;
  233. Features["gfx8-insts"] = true;
  234. Features["gfx9-insts"] = true;
  235. Features["gfx10-insts"] = true;
  236. Features["s-memrealtime"] = true;
  237. Features["s-memtime-inst"] = true;
  238. break;
  239. case GK_GFX940:
  240. Features["gfx940-insts"] = true;
  241. Features["fp8-insts"] = true;
  242. [[fallthrough]];
  243. case GK_GFX90A:
  244. Features["gfx90a-insts"] = true;
  245. [[fallthrough]];
  246. case GK_GFX908:
  247. Features["dot3-insts"] = true;
  248. Features["dot4-insts"] = true;
  249. Features["dot5-insts"] = true;
  250. Features["dot6-insts"] = true;
  251. Features["mai-insts"] = true;
  252. [[fallthrough]];
  253. case GK_GFX906:
  254. Features["dl-insts"] = true;
  255. Features["dot1-insts"] = true;
  256. Features["dot2-insts"] = true;
  257. Features["dot7-insts"] = true;
  258. [[fallthrough]];
  259. case GK_GFX90C:
  260. case GK_GFX909:
  261. case GK_GFX904:
  262. case GK_GFX902:
  263. case GK_GFX900:
  264. Features["gfx9-insts"] = true;
  265. [[fallthrough]];
  266. case GK_GFX810:
  267. case GK_GFX805:
  268. case GK_GFX803:
  269. case GK_GFX802:
  270. case GK_GFX801:
  271. Features["gfx8-insts"] = true;
  272. Features["16-bit-insts"] = true;
  273. Features["dpp"] = true;
  274. Features["s-memrealtime"] = true;
  275. [[fallthrough]];
  276. case GK_GFX705:
  277. case GK_GFX704:
  278. case GK_GFX703:
  279. case GK_GFX702:
  280. case GK_GFX701:
  281. case GK_GFX700:
  282. Features["ci-insts"] = true;
  283. [[fallthrough]];
  284. case GK_GFX602:
  285. case GK_GFX601:
  286. case GK_GFX600:
  287. Features["s-memtime-inst"] = true;
  288. break;
  289. case GK_NONE:
  290. break;
  291. default:
  292. llvm_unreachable("Unhandled GPU!");
  293. }
  294. } else {
  295. if (CPU.empty())
  296. CPU = "r600";
  297. switch (llvm::AMDGPU::parseArchR600(CPU)) {
  298. case GK_CAYMAN:
  299. case GK_CYPRESS:
  300. case GK_RV770:
  301. case GK_RV670:
  302. // TODO: Add fp64 when implemented.
  303. break;
  304. case GK_TURKS:
  305. case GK_CAICOS:
  306. case GK_BARTS:
  307. case GK_SUMO:
  308. case GK_REDWOOD:
  309. case GK_JUNIPER:
  310. case GK_CEDAR:
  311. case GK_RV730:
  312. case GK_RV710:
  313. case GK_RS880:
  314. case GK_R630:
  315. case GK_R600:
  316. break;
  317. default:
  318. llvm_unreachable("Unhandled GPU!");
  319. }
  320. }
  321. if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
  322. return false;
  323. // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
  324. const bool HaveWave32 =
  325. (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
  326. const bool HaveWave64 = Features.count("wavefrontsize64");
  327. // TODO: Should move this logic into TargetParser
  328. if (HaveWave32 && HaveWave64) {
  329. Diags.Report(diag::err_invalid_feature_combination)
  330. << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
  331. return false;
  332. }
  333. // Don't assume any wavesize with an unknown subtarget.
  334. if (!IsNullCPU) {
  335. // Default to wave32 if available, or wave64 if not
  336. if (!HaveWave32 && !HaveWave64) {
  337. StringRef DefaultWaveSizeFeature =
  338. IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
  339. Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
  340. }
  341. }
  342. return true;
  343. }
  344. void AMDGPUTargetInfo::fillValidCPUList(
  345. SmallVectorImpl<StringRef> &Values) const {
  346. if (isAMDGCN(getTriple()))
  347. llvm::AMDGPU::fillValidArchListAMDGCN(Values);
  348. else
  349. llvm::AMDGPU::fillValidArchListR600(Values);
  350. }
  351. void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
  352. AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
  353. }
  354. AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
  355. const TargetOptions &Opts)
  356. : TargetInfo(Triple),
  357. GPUKind(isAMDGCN(Triple) ?
  358. llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
  359. llvm::AMDGPU::parseArchR600(Opts.CPU)),
  360. GPUFeatures(isAMDGCN(Triple) ?
  361. llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
  362. llvm::AMDGPU::getArchAttrR600(GPUKind)) {
  363. resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
  364. : DataLayoutStringR600);
  365. setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
  366. !isAMDGCN(Triple));
  367. UseAddrSpaceMapMangling = true;
  368. if (isAMDGCN(Triple)) {
  369. // __bf16 is always available as a load/store only type on AMDGCN.
  370. BFloat16Width = BFloat16Align = 16;
  371. BFloat16Format = &llvm::APFloat::BFloat();
  372. }
  373. HasLegalHalfType = true;
  374. HasFloat16 = true;
  375. WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
  376. AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
  377. // Set pointer width and alignment for the generic address space.
  378. PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
  379. if (getMaxPointerWidth() == 64) {
  380. LongWidth = LongAlign = 64;
  381. SizeType = UnsignedLong;
  382. PtrDiffType = SignedLong;
  383. IntPtrType = SignedLong;
  384. }
  385. MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
  386. }
  387. void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
  388. TargetInfo::adjust(Diags, Opts);
  389. // ToDo: There are still a few places using default address space as private
  390. // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
  391. // can be removed from the following line.
  392. setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
  393. !isAMDGCN(getTriple()));
  394. }
  395. ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
  396. return llvm::ArrayRef(BuiltinInfo,
  397. clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
  398. }
  399. void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
  400. MacroBuilder &Builder) const {
  401. Builder.defineMacro("__AMD__");
  402. Builder.defineMacro("__AMDGPU__");
  403. if (isAMDGCN(getTriple()))
  404. Builder.defineMacro("__AMDGCN__");
  405. else
  406. Builder.defineMacro("__R600__");
  407. if (GPUKind != llvm::AMDGPU::GK_NONE) {
  408. StringRef CanonName = isAMDGCN(getTriple()) ?
  409. getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
  410. Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
  411. // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
  412. if (isAMDGCN(getTriple())) {
  413. assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
  414. Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
  415. Twine("__"));
  416. }
  417. if (isAMDGCN(getTriple())) {
  418. Builder.defineMacro("__amdgcn_processor__",
  419. Twine("\"") + Twine(CanonName) + Twine("\""));
  420. Builder.defineMacro("__amdgcn_target_id__",
  421. Twine("\"") + Twine(*getTargetID()) + Twine("\""));
  422. for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
  423. auto Loc = OffloadArchFeatures.find(F);
  424. if (Loc != OffloadArchFeatures.end()) {
  425. std::string NewF = F.str();
  426. std::replace(NewF.begin(), NewF.end(), '-', '_');
  427. Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
  428. Twine("__"),
  429. Loc->second ? "1" : "0");
  430. }
  431. }
  432. }
  433. }
  434. if (AllowAMDGPUUnsafeFPAtomics)
  435. Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
  436. // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
  437. // removed in the near future.
  438. if (hasFMAF())
  439. Builder.defineMacro("__HAS_FMAF__");
  440. if (hasFastFMAF())
  441. Builder.defineMacro("FP_FAST_FMAF");
  442. if (hasLDEXPF())
  443. Builder.defineMacro("__HAS_LDEXPF__");
  444. if (hasFP64())
  445. Builder.defineMacro("__HAS_FP64__");
  446. if (hasFastFMA())
  447. Builder.defineMacro("FP_FAST_FMA");
  448. Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
  449. }
  450. void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
  451. assert(HalfFormat == Aux->HalfFormat);
  452. assert(FloatFormat == Aux->FloatFormat);
  453. assert(DoubleFormat == Aux->DoubleFormat);
  454. // On x86_64 long double is 80-bit extended precision format, which is
  455. // not supported by AMDGPU. 128-bit floating point format is also not
  456. // supported by AMDGPU. Therefore keep its own format for these two types.
  457. auto SaveLongDoubleFormat = LongDoubleFormat;
  458. auto SaveFloat128Format = Float128Format;
  459. auto SaveLongDoubleWidth = LongDoubleWidth;
  460. auto SaveLongDoubleAlign = LongDoubleAlign;
  461. copyAuxTarget(Aux);
  462. LongDoubleFormat = SaveLongDoubleFormat;
  463. Float128Format = SaveFloat128Format;
  464. LongDoubleWidth = SaveLongDoubleWidth;
  465. LongDoubleAlign = SaveLongDoubleAlign;
  466. // For certain builtin types support on the host target, claim they are
  467. // support to pass the compilation of the host code during the device-side
  468. // compilation.
  469. // FIXME: As the side effect, we also accept `__float128` uses in the device
  470. // code. To rejct these builtin types supported in the host target but not in
  471. // the device target, one approach would support `device_builtin` attribute
  472. // so that we could tell the device builtin types from the host ones. The
  473. // also solves the different representations of the same builtin type, such
  474. // as `size_t` in the MSVC environment.
  475. if (Aux->hasFloat128Type()) {
  476. HasFloat128 = true;
  477. Float128Format = DoubleFormat;
  478. }
  479. }