NVPTX.cpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements NVPTX TargetInfo objects.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "NVPTX.h"
  13. #include "Targets.h"
  14. #include "clang/Basic/Builtins.h"
  15. #include "clang/Basic/MacroBuilder.h"
  16. #include "clang/Basic/TargetBuiltins.h"
  17. #include "llvm/ADT/StringSwitch.h"
  18. using namespace clang;
  19. using namespace clang::targets;
  20. static constexpr Builtin::Info BuiltinInfo[] = {
  21. #define BUILTIN(ID, TYPE, ATTRS) \
  22. {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
  23. #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
  24. {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES},
  25. #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
  26. {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
  27. #include "clang/Basic/BuiltinsNVPTX.def"
  28. };
  29. const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
  30. NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
  31. const TargetOptions &Opts,
  32. unsigned TargetPointerWidth)
  33. : TargetInfo(Triple) {
  34. assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
  35. "NVPTX only supports 32- and 64-bit modes.");
  36. PTXVersion = 32;
  37. for (const StringRef Feature : Opts.FeaturesAsWritten) {
  38. int PTXV;
  39. if (!Feature.startswith("+ptx") ||
  40. Feature.drop_front(4).getAsInteger(10, PTXV))
  41. continue;
  42. PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
  43. }
  44. TLSSupported = false;
  45. VLASupported = false;
  46. AddrSpaceMap = &NVPTXAddrSpaceMap;
  47. UseAddrSpaceMapMangling = true;
  48. // __bf16 is always available as a load/store only type.
  49. BFloat16Width = BFloat16Align = 16;
  50. BFloat16Format = &llvm::APFloat::BFloat();
  51. // Define available target features
  52. // These must be defined in sorted order!
  53. NoAsmVariants = true;
  54. GPU = CudaArch::SM_20;
  55. if (TargetPointerWidth == 32)
  56. resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
  57. else if (Opts.NVPTXUseShortPointers)
  58. resetDataLayout(
  59. "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
  60. else
  61. resetDataLayout("e-i64:64-i128:128-v16:16-v32:32-n16:32:64");
  62. // If possible, get a TargetInfo for our host triple, so we can match its
  63. // types.
  64. llvm::Triple HostTriple(Opts.HostTriple);
  65. if (!HostTriple.isNVPTX())
  66. HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
  67. // If no host target, make some guesses about the data layout and return.
  68. if (!HostTarget) {
  69. LongWidth = LongAlign = TargetPointerWidth;
  70. PointerWidth = PointerAlign = TargetPointerWidth;
  71. switch (TargetPointerWidth) {
  72. case 32:
  73. SizeType = TargetInfo::UnsignedInt;
  74. PtrDiffType = TargetInfo::SignedInt;
  75. IntPtrType = TargetInfo::SignedInt;
  76. break;
  77. case 64:
  78. SizeType = TargetInfo::UnsignedLong;
  79. PtrDiffType = TargetInfo::SignedLong;
  80. IntPtrType = TargetInfo::SignedLong;
  81. break;
  82. default:
  83. llvm_unreachable("TargetPointerWidth must be 32 or 64");
  84. }
  85. return;
  86. }
  87. // Copy properties from host target.
  88. PointerWidth = HostTarget->getPointerWidth(LangAS::Default);
  89. PointerAlign = HostTarget->getPointerAlign(LangAS::Default);
  90. BoolWidth = HostTarget->getBoolWidth();
  91. BoolAlign = HostTarget->getBoolAlign();
  92. IntWidth = HostTarget->getIntWidth();
  93. IntAlign = HostTarget->getIntAlign();
  94. HalfWidth = HostTarget->getHalfWidth();
  95. HalfAlign = HostTarget->getHalfAlign();
  96. FloatWidth = HostTarget->getFloatWidth();
  97. FloatAlign = HostTarget->getFloatAlign();
  98. DoubleWidth = HostTarget->getDoubleWidth();
  99. DoubleAlign = HostTarget->getDoubleAlign();
  100. LongWidth = HostTarget->getLongWidth();
  101. LongAlign = HostTarget->getLongAlign();
  102. LongLongWidth = HostTarget->getLongLongWidth();
  103. LongLongAlign = HostTarget->getLongLongAlign();
  104. MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0);
  105. NewAlign = HostTarget->getNewAlign();
  106. DefaultAlignForAttributeAligned =
  107. HostTarget->getDefaultAlignForAttributeAligned();
  108. SizeType = HostTarget->getSizeType();
  109. IntMaxType = HostTarget->getIntMaxType();
  110. PtrDiffType = HostTarget->getPtrDiffType(LangAS::Default);
  111. IntPtrType = HostTarget->getIntPtrType();
  112. WCharType = HostTarget->getWCharType();
  113. WIntType = HostTarget->getWIntType();
  114. Char16Type = HostTarget->getChar16Type();
  115. Char32Type = HostTarget->getChar32Type();
  116. Int64Type = HostTarget->getInt64Type();
  117. SigAtomicType = HostTarget->getSigAtomicType();
  118. ProcessIDType = HostTarget->getProcessIDType();
  119. UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
  120. UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
  121. UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
  122. ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
  123. // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
  124. // we need those macros to be identical on host and device, because (among
  125. // other things) they affect which standard library classes are defined, and
  126. // we need all classes to be defined on both the host and device.
  127. MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
  128. // Properties intentionally not copied from host:
  129. // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
  130. // host/device boundary.
  131. // - SuitableAlign: Not visible across the host/device boundary, and may
  132. // correctly be different on host/device, e.g. if host has wider vector
  133. // types than device.
  134. // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
  135. // as its double type, but that's not necessarily true on the host.
  136. // TODO: nvcc emits a warning when using long double on device; we should
  137. // do the same.
  138. }
  139. ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
  140. return llvm::ArrayRef(GCCRegNames);
  141. }
  142. bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
  143. return llvm::StringSwitch<bool>(Feature)
  144. .Cases("ptx", "nvptx", true)
  145. .Default(false);
  146. }
  147. void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
  148. MacroBuilder &Builder) const {
  149. Builder.defineMacro("__PTX__");
  150. Builder.defineMacro("__NVPTX__");
  151. if (Opts.CUDAIsDevice || Opts.OpenMPIsDevice) {
  152. // Set __CUDA_ARCH__ for the GPU specified.
  153. std::string CUDAArchCode = [this] {
  154. switch (GPU) {
  155. case CudaArch::GFX600:
  156. case CudaArch::GFX601:
  157. case CudaArch::GFX602:
  158. case CudaArch::GFX700:
  159. case CudaArch::GFX701:
  160. case CudaArch::GFX702:
  161. case CudaArch::GFX703:
  162. case CudaArch::GFX704:
  163. case CudaArch::GFX705:
  164. case CudaArch::GFX801:
  165. case CudaArch::GFX802:
  166. case CudaArch::GFX803:
  167. case CudaArch::GFX805:
  168. case CudaArch::GFX810:
  169. case CudaArch::GFX900:
  170. case CudaArch::GFX902:
  171. case CudaArch::GFX904:
  172. case CudaArch::GFX906:
  173. case CudaArch::GFX908:
  174. case CudaArch::GFX909:
  175. case CudaArch::GFX90a:
  176. case CudaArch::GFX90c:
  177. case CudaArch::GFX940:
  178. case CudaArch::GFX1010:
  179. case CudaArch::GFX1011:
  180. case CudaArch::GFX1012:
  181. case CudaArch::GFX1013:
  182. case CudaArch::GFX1030:
  183. case CudaArch::GFX1031:
  184. case CudaArch::GFX1032:
  185. case CudaArch::GFX1033:
  186. case CudaArch::GFX1034:
  187. case CudaArch::GFX1035:
  188. case CudaArch::GFX1036:
  189. case CudaArch::GFX1100:
  190. case CudaArch::GFX1101:
  191. case CudaArch::GFX1102:
  192. case CudaArch::GFX1103:
  193. case CudaArch::Generic:
  194. case CudaArch::LAST:
  195. break;
  196. case CudaArch::UNUSED:
  197. case CudaArch::UNKNOWN:
  198. assert(false && "No GPU arch when compiling CUDA device code.");
  199. return "";
  200. case CudaArch::SM_20:
  201. return "200";
  202. case CudaArch::SM_21:
  203. return "210";
  204. case CudaArch::SM_30:
  205. return "300";
  206. case CudaArch::SM_32:
  207. return "320";
  208. case CudaArch::SM_35:
  209. return "350";
  210. case CudaArch::SM_37:
  211. return "370";
  212. case CudaArch::SM_50:
  213. return "500";
  214. case CudaArch::SM_52:
  215. return "520";
  216. case CudaArch::SM_53:
  217. return "530";
  218. case CudaArch::SM_60:
  219. return "600";
  220. case CudaArch::SM_61:
  221. return "610";
  222. case CudaArch::SM_62:
  223. return "620";
  224. case CudaArch::SM_70:
  225. return "700";
  226. case CudaArch::SM_72:
  227. return "720";
  228. case CudaArch::SM_75:
  229. return "750";
  230. case CudaArch::SM_80:
  231. return "800";
  232. case CudaArch::SM_86:
  233. return "860";
  234. case CudaArch::SM_87:
  235. return "870";
  236. case CudaArch::SM_89:
  237. return "890";
  238. case CudaArch::SM_90:
  239. return "900";
  240. }
  241. llvm_unreachable("unhandled CudaArch");
  242. }();
  243. Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
  244. }
  245. }
  246. ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
  247. return llvm::ArrayRef(BuiltinInfo,
  248. clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
  249. }