X86Subtarget.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file declares the X86 specific subclass of TargetSubtargetInfo.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  13. #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  14. #include "X86FrameLowering.h"
  15. #include "X86ISelLowering.h"
  16. #include "X86InstrInfo.h"
  17. #include "X86SelectionDAGInfo.h"
  18. #include "llvm/ADT/Triple.h"
  19. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  20. #include "llvm/IR/CallingConv.h"
  21. #include <climits>
  22. #include <memory>
  23. #define GET_SUBTARGETINFO_HEADER
  24. #include "X86GenSubtargetInfo.inc"
  25. namespace llvm {
  26. class CallLowering;
  27. class GlobalValue;
  28. class InstructionSelector;
  29. class LegalizerInfo;
  30. class RegisterBankInfo;
  31. class StringRef;
  32. class TargetMachine;
  33. /// The X86 backend supports a number of different styles of PIC.
  34. ///
  35. namespace PICStyles {
  36. enum class Style {
  37. StubPIC, // Used on i386-darwin in pic mode.
  38. GOT, // Used on 32 bit elf on when in pic mode.
  39. RIPRel, // Used on X86-64 when in pic mode.
  40. None // Set when not in pic mode.
  41. };
  42. } // end namespace PICStyles
  43. class X86Subtarget final : public X86GenSubtargetInfo {
  44. enum X86SSEEnum {
  45. NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
  46. };
  47. enum X863DNowEnum {
  48. NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
  49. };
  50. /// Which PIC style to use
  51. PICStyles::Style PICStyle;
  52. const TargetMachine &TM;
  53. /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
  54. X86SSEEnum X86SSELevel = NoSSE;
  55. /// MMX, 3DNow, 3DNow Athlon, or none supported.
  56. X863DNowEnum X863DNowLevel = NoThreeDNow;
  57. #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
  58. bool ATTRIBUTE = DEFAULT;
  59. #include "X86GenSubtargetInfo.inc"
  60. /// The minimum alignment known to hold of the stack frame on
  61. /// entry to the function and which must be maintained by every function.
  62. Align stackAlignment = Align(4);
  63. Align TileConfigAlignment = Align(4);
  64. /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
  65. ///
  66. // FIXME: this is a known good value for Yonah. How about others?
  67. unsigned MaxInlineSizeThreshold = 128;
  68. /// What processor and OS we're targeting.
  69. Triple TargetTriple;
  70. /// GlobalISel related APIs.
  71. std::unique_ptr<CallLowering> CallLoweringInfo;
  72. std::unique_ptr<LegalizerInfo> Legalizer;
  73. std::unique_ptr<RegisterBankInfo> RegBankInfo;
  74. std::unique_ptr<InstructionSelector> InstSelector;
  75. /// Override the stack alignment.
  76. MaybeAlign StackAlignOverride;
  77. /// Preferred vector width from function attribute.
  78. unsigned PreferVectorWidthOverride;
  79. /// Resolved preferred vector width from function attribute and subtarget
  80. /// features.
  81. unsigned PreferVectorWidth = UINT32_MAX;
  82. /// Required vector width from function attribute.
  83. unsigned RequiredVectorWidth;
  84. X86SelectionDAGInfo TSInfo;
  85. // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
  86. // X86TargetLowering needs.
  87. X86InstrInfo InstrInfo;
  88. X86TargetLowering TLInfo;
  89. X86FrameLowering FrameLowering;
  90. public:
  91. /// This constructor initializes the data members to match that
  92. /// of the specified triple.
  93. ///
  94. X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
  95. const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
  96. unsigned PreferVectorWidthOverride,
  97. unsigned RequiredVectorWidth);
  98. const X86TargetLowering *getTargetLowering() const override {
  99. return &TLInfo;
  100. }
  101. const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
  102. const X86FrameLowering *getFrameLowering() const override {
  103. return &FrameLowering;
  104. }
  105. const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
  106. return &TSInfo;
  107. }
  108. const X86RegisterInfo *getRegisterInfo() const override {
  109. return &getInstrInfo()->getRegisterInfo();
  110. }
  111. unsigned getTileConfigSize() const { return 64; }
  112. Align getTileConfigAlignment() const { return TileConfigAlignment; }
  113. /// Returns the minimum alignment known to hold of the
  114. /// stack frame on entry to the function and which must be maintained by every
  115. /// function for this subtarget.
  116. Align getStackAlignment() const { return stackAlignment; }
  117. /// Returns the maximum memset / memcpy size
  118. /// that still makes it profitable to inline the call.
  119. unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
  120. /// ParseSubtargetFeatures - Parses features string setting specified
  121. /// subtarget options. Definition of function is auto generated by tblgen.
  122. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
  123. /// Methods used by Global ISel
  124. const CallLowering *getCallLowering() const override;
  125. InstructionSelector *getInstructionSelector() const override;
  126. const LegalizerInfo *getLegalizerInfo() const override;
  127. const RegisterBankInfo *getRegBankInfo() const override;
  128. private:
  129. /// Initialize the full set of dependencies so we can use an initializer
  130. /// list for X86Subtarget.
  131. X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
  132. StringRef TuneCPU,
  133. StringRef FS);
  134. void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
  135. public:
  136. #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
  137. bool GETTER() const { return ATTRIBUTE; }
  138. #include "X86GenSubtargetInfo.inc"
  139. /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
  140. bool isTarget64BitILP32() const {
  141. return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
  142. }
  143. /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
  144. bool isTarget64BitLP64() const {
  145. return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
  146. }
  147. PICStyles::Style getPICStyle() const { return PICStyle; }
  148. void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
  149. bool canUseCMPXCHG8B() const { return hasCX8(); }
  150. bool canUseCMPXCHG16B() const {
  151. // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
  152. return hasCX16() && is64Bit();
  153. }
  154. // SSE codegen depends on cmovs, and all SSE1+ processors support them.
  155. // All 64-bit processors support cmov.
  156. bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
  157. bool hasSSE1() const { return X86SSELevel >= SSE1; }
  158. bool hasSSE2() const { return X86SSELevel >= SSE2; }
  159. bool hasSSE3() const { return X86SSELevel >= SSE3; }
  160. bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
  161. bool hasSSE41() const { return X86SSELevel >= SSE41; }
  162. bool hasSSE42() const { return X86SSELevel >= SSE42; }
  163. bool hasAVX() const { return X86SSELevel >= AVX; }
  164. bool hasAVX2() const { return X86SSELevel >= AVX2; }
  165. bool hasAVX512() const { return X86SSELevel >= AVX512; }
  166. bool hasInt256() const { return hasAVX2(); }
  167. bool hasMMX() const { return X863DNowLevel >= MMX; }
  168. bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
  169. bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
  170. bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
  171. bool hasPrefetchW() const {
  172. // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
  173. // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
  174. // it and KNL has another that prefetches to L2 cache. We assume the
  175. // L1 version exists if the L2 version does.
  176. return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
  177. }
  178. bool hasSSEPrefetch() const {
  179. // We implicitly enable these when we have a write prefix supporting cache
  180. // level OR if we have prfchw, but don't already have a read prefetch from
  181. // 3dnow.
  182. return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
  183. hasPREFETCHI();
  184. }
  185. bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
  186. // These are generic getters that OR together all of the thunk types
  187. // supported by the subtarget. Therefore useIndirectThunk*() will return true
  188. // if any respective thunk feature is enabled.
  189. bool useIndirectThunkCalls() const {
  190. return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
  191. }
  192. bool useIndirectThunkBranches() const {
  193. return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
  194. }
  195. unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
  196. unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
  197. // Helper functions to determine when we should allow widening to 512-bit
  198. // during codegen.
  199. // TODO: Currently we're always allowing widening on CPUs without VLX,
  200. // because for many cases we don't have a better option.
  201. bool canExtendTo512DQ() const {
  202. return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
  203. }
  204. bool canExtendTo512BW() const {
  205. return hasBWI() && canExtendTo512DQ();
  206. }
  207. // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
  208. // disable them in the legalizer.
  209. bool useAVX512Regs() const {
  210. return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
  211. }
  212. bool useLight256BitInstructions() const {
  213. return getPreferVectorWidth() >= 256 || AllowLight256Bit;
  214. }
  215. bool useBWIRegs() const {
  216. return hasBWI() && useAVX512Regs();
  217. }
  218. bool isXRaySupported() const override { return is64Bit(); }
  219. /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
  220. /// no-sse2). There isn't any reason to disable it if the target processor
  221. /// supports it.
  222. bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
  223. /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
  224. /// no-sse2). There isn't any reason to disable it if the target processor
  225. /// supports it.
  226. bool hasMFence() const { return hasSSE2() || is64Bit(); }
  227. const Triple &getTargetTriple() const { return TargetTriple; }
  228. bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
  229. bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
  230. bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
  231. bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
  232. bool isTargetPS() const { return TargetTriple.isPS(); }
  233. bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
  234. bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
  235. bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
  236. bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
  237. bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
  238. bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
  239. bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
  240. bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
  241. bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
  242. bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
  243. bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
  244. bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
  245. bool isTargetWindowsMSVC() const {
  246. return TargetTriple.isWindowsMSVCEnvironment();
  247. }
  248. bool isTargetWindowsCoreCLR() const {
  249. return TargetTriple.isWindowsCoreCLREnvironment();
  250. }
  251. bool isTargetWindowsCygwin() const {
  252. return TargetTriple.isWindowsCygwinEnvironment();
  253. }
  254. bool isTargetWindowsGNU() const {
  255. return TargetTriple.isWindowsGNUEnvironment();
  256. }
  257. bool isTargetWindowsItanium() const {
  258. return TargetTriple.isWindowsItaniumEnvironment();
  259. }
  260. bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
  261. bool isOSWindows() const { return TargetTriple.isOSWindows(); }
  262. bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
  263. bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
  264. bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
  265. bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
  266. bool isPICStyleStubPIC() const {
  267. return PICStyle == PICStyles::Style::StubPIC;
  268. }
  269. bool isPositionIndependent() const;
  270. bool isCallingConvWin64(CallingConv::ID CC) const {
  271. switch (CC) {
  272. // On Win64, all these conventions just use the default convention.
  273. case CallingConv::C:
  274. case CallingConv::Fast:
  275. case CallingConv::Tail:
  276. case CallingConv::Swift:
  277. case CallingConv::SwiftTail:
  278. case CallingConv::X86_FastCall:
  279. case CallingConv::X86_StdCall:
  280. case CallingConv::X86_ThisCall:
  281. case CallingConv::X86_VectorCall:
  282. case CallingConv::Intel_OCL_BI:
  283. return isTargetWin64();
  284. // This convention allows using the Win64 convention on other targets.
  285. case CallingConv::Win64:
  286. return true;
  287. // This convention allows using the SysV convention on Windows targets.
  288. case CallingConv::X86_64_SysV:
  289. return false;
  290. // Otherwise, who knows what this is.
  291. default:
  292. return false;
  293. }
  294. }
  295. /// Classify a global variable reference for the current subtarget according
  296. /// to how we should reference it in a non-pcrel context.
  297. unsigned char classifyLocalReference(const GlobalValue *GV) const;
  298. unsigned char classifyGlobalReference(const GlobalValue *GV,
  299. const Module &M) const;
  300. unsigned char classifyGlobalReference(const GlobalValue *GV) const;
  301. /// Classify a global function reference for the current subtarget.
  302. unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
  303. const Module &M) const;
  304. unsigned char
  305. classifyGlobalFunctionReference(const GlobalValue *GV) const override;
  306. /// Classify a blockaddress reference for the current subtarget according to
  307. /// how we should reference it in a non-pcrel context.
  308. unsigned char classifyBlockAddressReference() const;
  309. /// Return true if the subtarget allows calls to immediate address.
  310. bool isLegalToCallImmediateAddr() const;
  311. /// Return whether FrameLowering should always set the "extended frame
  312. /// present" bit in FP, or set it based on a symbol in the runtime.
  313. bool swiftAsyncContextIsDynamicallySet() const {
  314. // Older OS versions (particularly system unwinders) are confused by the
  315. // Swift extended frame, so when building code that might be run on them we
  316. // must dynamically query the concurrency library to determine whether
  317. // extended frames should be flagged as present.
  318. const Triple &TT = getTargetTriple();
  319. unsigned Major = TT.getOSVersion().getMajor();
  320. switch(TT.getOS()) {
  321. default:
  322. return false;
  323. case Triple::IOS:
  324. case Triple::TvOS:
  325. return Major < 15;
  326. case Triple::WatchOS:
  327. return Major < 8;
  328. case Triple::MacOSX:
  329. case Triple::Darwin:
  330. return Major < 12;
  331. }
  332. }
  333. /// If we are using indirect thunks, we need to expand indirectbr to avoid it
  334. /// lowering to an actual indirect jump.
  335. bool enableIndirectBrExpand() const override {
  336. return useIndirectThunkBranches();
  337. }
  338. /// Enable the MachineScheduler pass for all X86 subtargets.
  339. bool enableMachineScheduler() const override { return true; }
  340. bool enableEarlyIfConversion() const override;
  341. void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
  342. &Mutations) const override;
  343. AntiDepBreakMode getAntiDepBreakMode() const override {
  344. return TargetSubtargetInfo::ANTIDEP_CRITICAL;
  345. }
  346. };
  347. } // end namespace llvm
  348. #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H