AMDGPU.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. //===--- AMDGPU.h - Declare AMDGPU target feature support -------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file declares AMDGPU TargetInfo objects.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
  13. #define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
  14. #include "clang/Basic/TargetID.h"
  15. #include "clang/Basic/TargetInfo.h"
  16. #include "clang/Basic/TargetOptions.h"
  17. #include "llvm/ADT/StringSet.h"
  18. #include "llvm/ADT/Triple.h"
  19. #include "llvm/Support/Compiler.h"
  20. #include "llvm/Support/TargetParser.h"
  21. #include <optional>
  22. namespace clang {
  23. namespace targets {
  24. class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
  25. static const char *const GCCRegNames[];
  26. enum AddrSpace {
  27. Generic = 0,
  28. Global = 1,
  29. Local = 3,
  30. Constant = 4,
  31. Private = 5
  32. };
  33. static const LangASMap AMDGPUDefIsGenMap;
  34. static const LangASMap AMDGPUDefIsPrivMap;
  35. llvm::AMDGPU::GPUKind GPUKind;
  36. unsigned GPUFeatures;
  37. unsigned WavefrontSize;
  38. /// Target ID is device name followed by optional feature name postfixed
  39. /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sramecc-.
  40. /// If the target ID contains feature+, map it to true.
  41. /// If the target ID contains feature-, map it to false.
  42. /// If the target ID does not contain a feature (default), do not map it.
  43. llvm::StringMap<bool> OffloadArchFeatures;
  44. std::string TargetID;
  45. bool hasFP64() const {
  46. return getTriple().getArch() == llvm::Triple::amdgcn ||
  47. !!(GPUFeatures & llvm::AMDGPU::FEATURE_FP64);
  48. }
  49. /// Has fast fma f32
  50. bool hasFastFMAF() const {
  51. return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_FMA_F32);
  52. }
  53. /// Has fast fma f64
  54. bool hasFastFMA() const {
  55. return getTriple().getArch() == llvm::Triple::amdgcn;
  56. }
  57. bool hasFMAF() const {
  58. return getTriple().getArch() == llvm::Triple::amdgcn ||
  59. !!(GPUFeatures & llvm::AMDGPU::FEATURE_FMA);
  60. }
  61. bool hasFullRateDenormalsF32() const {
  62. return !!(GPUFeatures & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
  63. }
  64. bool hasLDEXPF() const {
  65. return getTriple().getArch() == llvm::Triple::amdgcn ||
  66. !!(GPUFeatures & llvm::AMDGPU::FEATURE_LDEXP);
  67. }
  68. static bool isAMDGCN(const llvm::Triple &TT) {
  69. return TT.getArch() == llvm::Triple::amdgcn;
  70. }
  71. static bool isR600(const llvm::Triple &TT) {
  72. return TT.getArch() == llvm::Triple::r600;
  73. }
  74. public:
  75. AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
  76. void setAddressSpaceMap(bool DefaultIsPrivate);
  77. void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override;
  78. uint64_t getPointerWidthV(LangAS AS) const override {
  79. if (isR600(getTriple()))
  80. return 32;
  81. unsigned TargetAS = getTargetAddressSpace(AS);
  82. if (TargetAS == Private || TargetAS == Local)
  83. return 32;
  84. return 64;
  85. }
  86. uint64_t getPointerAlignV(LangAS AddrSpace) const override {
  87. return getPointerWidthV(AddrSpace);
  88. }
  89. uint64_t getMaxPointerWidth() const override {
  90. return getTriple().getArch() == llvm::Triple::amdgcn ? 64 : 32;
  91. }
  92. bool hasBFloat16Type() const override { return isAMDGCN(getTriple()); }
  93. const char *getBFloat16Mangling() const override { return "u6__bf16"; };
  94. const char *getClobbers() const override { return ""; }
  95. ArrayRef<const char *> getGCCRegNames() const override;
  96. ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
  97. return std::nullopt;
  98. }
  99. /// Accepted register names: (n, m is unsigned integer, n < m)
  100. /// v
  101. /// s
  102. /// a
  103. /// {vn}, {v[n]}
  104. /// {sn}, {s[n]}
  105. /// {an}, {a[n]}
  106. /// {S} , where S is a special register name
  107. ////{v[n:m]}
  108. /// {s[n:m]}
  109. /// {a[n:m]}
  110. bool validateAsmConstraint(const char *&Name,
  111. TargetInfo::ConstraintInfo &Info) const override {
  112. static const ::llvm::StringSet<> SpecialRegs({
  113. "exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma",
  114. "flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo",
  115. "exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",
  116. });
  117. switch (*Name) {
  118. case 'I':
  119. Info.setRequiresImmediate(-16, 64);
  120. return true;
  121. case 'J':
  122. Info.setRequiresImmediate(-32768, 32767);
  123. return true;
  124. case 'A':
  125. case 'B':
  126. case 'C':
  127. Info.setRequiresImmediate();
  128. return true;
  129. default:
  130. break;
  131. }
  132. StringRef S(Name);
  133. if (S == "DA" || S == "DB") {
  134. Name++;
  135. Info.setRequiresImmediate();
  136. return true;
  137. }
  138. bool HasLeftParen = false;
  139. if (S.front() == '{') {
  140. HasLeftParen = true;
  141. S = S.drop_front();
  142. }
  143. if (S.empty())
  144. return false;
  145. if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') {
  146. if (!HasLeftParen)
  147. return false;
  148. auto E = S.find('}');
  149. if (!SpecialRegs.count(S.substr(0, E)))
  150. return false;
  151. S = S.drop_front(E + 1);
  152. if (!S.empty())
  153. return false;
  154. // Found {S} where S is a special register.
  155. Info.setAllowsRegister();
  156. Name = S.data() - 1;
  157. return true;
  158. }
  159. S = S.drop_front();
  160. if (!HasLeftParen) {
  161. if (!S.empty())
  162. return false;
  163. // Found s, v or a.
  164. Info.setAllowsRegister();
  165. Name = S.data() - 1;
  166. return true;
  167. }
  168. bool HasLeftBracket = false;
  169. if (!S.empty() && S.front() == '[') {
  170. HasLeftBracket = true;
  171. S = S.drop_front();
  172. }
  173. unsigned long long N;
  174. if (S.empty() || consumeUnsignedInteger(S, 10, N))
  175. return false;
  176. if (!S.empty() && S.front() == ':') {
  177. if (!HasLeftBracket)
  178. return false;
  179. S = S.drop_front();
  180. unsigned long long M;
  181. if (consumeUnsignedInteger(S, 10, M) || N >= M)
  182. return false;
  183. }
  184. if (HasLeftBracket) {
  185. if (S.empty() || S.front() != ']')
  186. return false;
  187. S = S.drop_front();
  188. }
  189. if (S.empty() || S.front() != '}')
  190. return false;
  191. S = S.drop_front();
  192. if (!S.empty())
  193. return false;
  194. // Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]}
  195. // or {a[n:m]}.
  196. Info.setAllowsRegister();
  197. Name = S.data() - 1;
  198. return true;
  199. }
  200. // \p Constraint will be left pointing at the last character of
  201. // the constraint. In practice, it won't be changed unless the
  202. // constraint is longer than one character.
  203. std::string convertConstraint(const char *&Constraint) const override {
  204. StringRef S(Constraint);
  205. if (S == "DA" || S == "DB") {
  206. return std::string("^") + std::string(Constraint++, 2);
  207. }
  208. const char *Begin = Constraint;
  209. TargetInfo::ConstraintInfo Info("", "");
  210. if (validateAsmConstraint(Constraint, Info))
  211. return std::string(Begin).substr(0, Constraint - Begin + 1);
  212. Constraint = Begin;
  213. return std::string(1, *Constraint);
  214. }
  215. bool
  216. initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
  217. StringRef CPU,
  218. const std::vector<std::string> &FeatureVec) const override;
  219. ArrayRef<Builtin::Info> getTargetBuiltins() const override;
  220. bool useFP16ConversionIntrinsics() const override { return false; }
  221. void getTargetDefines(const LangOptions &Opts,
  222. MacroBuilder &Builder) const override;
  223. BuiltinVaListKind getBuiltinVaListKind() const override {
  224. return TargetInfo::CharPtrBuiltinVaList;
  225. }
  226. bool isValidCPUName(StringRef Name) const override {
  227. if (getTriple().getArch() == llvm::Triple::amdgcn)
  228. return llvm::AMDGPU::parseArchAMDGCN(Name) != llvm::AMDGPU::GK_NONE;
  229. return llvm::AMDGPU::parseArchR600(Name) != llvm::AMDGPU::GK_NONE;
  230. }
  231. void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
  232. bool setCPU(const std::string &Name) override {
  233. if (getTriple().getArch() == llvm::Triple::amdgcn) {
  234. GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name);
  235. GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind);
  236. } else {
  237. GPUKind = llvm::AMDGPU::parseArchR600(Name);
  238. GPUFeatures = llvm::AMDGPU::getArchAttrR600(GPUKind);
  239. }
  240. return GPUKind != llvm::AMDGPU::GK_NONE;
  241. }
  242. void setSupportedOpenCLOpts() override {
  243. auto &Opts = getSupportedOpenCLOpts();
  244. Opts["cl_clang_storage_class_specifiers"] = true;
  245. Opts["__cl_clang_variadic_functions"] = true;
  246. Opts["__cl_clang_function_pointers"] = true;
  247. Opts["__cl_clang_non_portable_kernel_param_types"] = true;
  248. Opts["__cl_clang_bitfields"] = true;
  249. bool IsAMDGCN = isAMDGCN(getTriple());
  250. Opts["cl_khr_fp64"] = hasFP64();
  251. Opts["__opencl_c_fp64"] = hasFP64();
  252. if (IsAMDGCN || GPUKind >= llvm::AMDGPU::GK_CEDAR) {
  253. Opts["cl_khr_byte_addressable_store"] = true;
  254. Opts["cl_khr_global_int32_base_atomics"] = true;
  255. Opts["cl_khr_global_int32_extended_atomics"] = true;
  256. Opts["cl_khr_local_int32_base_atomics"] = true;
  257. Opts["cl_khr_local_int32_extended_atomics"] = true;
  258. }
  259. if (IsAMDGCN) {
  260. Opts["cl_khr_fp16"] = true;
  261. Opts["cl_khr_int64_base_atomics"] = true;
  262. Opts["cl_khr_int64_extended_atomics"] = true;
  263. Opts["cl_khr_mipmap_image"] = true;
  264. Opts["cl_khr_mipmap_image_writes"] = true;
  265. Opts["cl_khr_subgroups"] = true;
  266. Opts["cl_amd_media_ops"] = true;
  267. Opts["cl_amd_media_ops2"] = true;
  268. Opts["__opencl_c_images"] = true;
  269. Opts["__opencl_c_3d_image_writes"] = true;
  270. Opts["cl_khr_3d_image_writes"] = true;
  271. }
  272. }
  273. LangAS getOpenCLTypeAddrSpace(OpenCLTypeKind TK) const override {
  274. switch (TK) {
  275. case OCLTK_Image:
  276. return LangAS::opencl_constant;
  277. case OCLTK_ClkEvent:
  278. case OCLTK_Queue:
  279. case OCLTK_ReserveID:
  280. return LangAS::opencl_global;
  281. default:
  282. return TargetInfo::getOpenCLTypeAddrSpace(TK);
  283. }
  284. }
  285. LangAS getOpenCLBuiltinAddressSpace(unsigned AS) const override {
  286. switch (AS) {
  287. case 0:
  288. return LangAS::opencl_generic;
  289. case 1:
  290. return LangAS::opencl_global;
  291. case 3:
  292. return LangAS::opencl_local;
  293. case 4:
  294. return LangAS::opencl_constant;
  295. case 5:
  296. return LangAS::opencl_private;
  297. default:
  298. return getLangASFromTargetAS(AS);
  299. }
  300. }
  301. LangAS getCUDABuiltinAddressSpace(unsigned AS) const override {
  302. switch (AS) {
  303. case 0:
  304. return LangAS::Default;
  305. case 1:
  306. return LangAS::cuda_device;
  307. case 3:
  308. return LangAS::cuda_shared;
  309. case 4:
  310. return LangAS::cuda_constant;
  311. default:
  312. return getLangASFromTargetAS(AS);
  313. }
  314. }
  315. std::optional<LangAS> getConstantAddressSpace() const override {
  316. return getLangASFromTargetAS(Constant);
  317. }
  318. const llvm::omp::GV &getGridValue() const override {
  319. switch (WavefrontSize) {
  320. case 32:
  321. return llvm::omp::getAMDGPUGridValues<32>();
  322. case 64:
  323. return llvm::omp::getAMDGPUGridValues<64>();
  324. default:
  325. llvm_unreachable("getGridValue not implemented for this wavesize");
  326. }
  327. }
  328. /// \returns Target specific vtbl ptr address space.
  329. unsigned getVtblPtrAddressSpace() const override {
  330. return static_cast<unsigned>(Constant);
  331. }
  332. /// \returns If a target requires an address within a target specific address
  333. /// space \p AddressSpace to be converted in order to be used, then return the
  334. /// corresponding target specific DWARF address space.
  335. ///
  336. /// \returns Otherwise return std::nullopt and no conversion will be emitted
  337. /// in the DWARF.
  338. std::optional<unsigned>
  339. getDWARFAddressSpace(unsigned AddressSpace) const override {
  340. const unsigned DWARF_Private = 1;
  341. const unsigned DWARF_Local = 2;
  342. if (AddressSpace == Private) {
  343. return DWARF_Private;
  344. } else if (AddressSpace == Local) {
  345. return DWARF_Local;
  346. } else {
  347. return std::nullopt;
  348. }
  349. }
  350. CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
  351. switch (CC) {
  352. default:
  353. return CCCR_Warning;
  354. case CC_C:
  355. case CC_OpenCLKernel:
  356. case CC_AMDGPUKernelCall:
  357. return CCCR_OK;
  358. }
  359. }
  360. // In amdgcn target the null pointer in global, constant, and generic
  361. // address space has value 0 but in private and local address space has
  362. // value ~0.
  363. uint64_t getNullPointerValue(LangAS AS) const override {
  364. // FIXME: Also should handle region.
  365. return (AS == LangAS::opencl_local || AS == LangAS::opencl_private)
  366. ? ~0 : 0;
  367. }
  368. void setAuxTarget(const TargetInfo *Aux) override;
  369. bool hasBitIntType() const override { return true; }
  370. // Record offload arch features since they are needed for defining the
  371. // pre-defined macros.
  372. bool handleTargetFeatures(std::vector<std::string> &Features,
  373. DiagnosticsEngine &Diags) override {
  374. auto TargetIDFeatures =
  375. getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind));
  376. for (const auto &F : Features) {
  377. assert(F.front() == '+' || F.front() == '-');
  378. if (F == "+wavefrontsize64")
  379. WavefrontSize = 64;
  380. bool IsOn = F.front() == '+';
  381. StringRef Name = StringRef(F).drop_front();
  382. if (!llvm::is_contained(TargetIDFeatures, Name))
  383. continue;
  384. assert(OffloadArchFeatures.find(Name) == OffloadArchFeatures.end());
  385. OffloadArchFeatures[Name] = IsOn;
  386. }
  387. return true;
  388. }
  389. std::optional<std::string> getTargetID() const override {
  390. if (!isAMDGCN(getTriple()))
  391. return std::nullopt;
  392. // When -target-cpu is not set, we assume generic code that it is valid
  393. // for all GPU and use an empty string as target ID to represent that.
  394. if (GPUKind == llvm::AMDGPU::GK_NONE)
  395. return std::string("");
  396. return getCanonicalTargetID(getArchNameAMDGCN(GPUKind),
  397. OffloadArchFeatures);
  398. }
  399. };
  400. } // namespace targets
  401. } // namespace clang
  402. #endif // LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H