X86Subtarget.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file declares the X86 specific subclass of TargetSubtargetInfo.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  13. #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
  14. #include "X86FrameLowering.h"
  15. #include "X86ISelLowering.h"
  16. #include "X86InstrInfo.h"
  17. #include "X86SelectionDAGInfo.h"
  18. #include "llvm/ADT/Triple.h"
  19. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  20. #include "llvm/IR/CallingConv.h"
  21. #include <climits>
  22. #include <memory>
  23. #define GET_SUBTARGETINFO_HEADER
  24. #include "X86GenSubtargetInfo.inc"
  25. namespace llvm {
  26. class CallLowering;
  27. class GlobalValue;
  28. class InstructionSelector;
  29. class LegalizerInfo;
  30. class RegisterBankInfo;
  31. class StringRef;
  32. class TargetMachine;
  33. /// The X86 backend supports a number of different styles of PIC.
  34. ///
  35. namespace PICStyles {
  36. enum class Style {
  37. StubPIC, // Used on i386-darwin in pic mode.
  38. GOT, // Used on 32 bit elf on when in pic mode.
  39. RIPRel, // Used on X86-64 when in pic mode.
  40. None // Set when not in pic mode.
  41. };
  42. } // end namespace PICStyles
  43. class X86Subtarget final : public X86GenSubtargetInfo {
  44. // NOTE: Do not add anything new to this list. Coarse, CPU name based flags
  45. // are not a good idea. We should be migrating away from these.
  46. enum X86ProcFamilyEnum {
  47. Others,
  48. IntelAtom
  49. };
  50. enum X86SSEEnum {
  51. NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
  52. };
  53. enum X863DNowEnum {
  54. NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
  55. };
  56. /// X86 processor family: Intel Atom, and others
  57. X86ProcFamilyEnum X86ProcFamily = Others;
  58. /// Which PIC style to use
  59. PICStyles::Style PICStyle;
  60. const TargetMachine &TM;
  61. /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
  62. X86SSEEnum X86SSELevel = NoSSE;
  63. /// MMX, 3DNow, 3DNow Athlon, or none supported.
  64. X863DNowEnum X863DNowLevel = NoThreeDNow;
  65. /// True if the processor supports X87 instructions.
  66. bool HasX87 = false;
  67. /// True if the processor supports CMPXCHG8B.
  68. bool HasCmpxchg8b = false;
  69. /// True if this processor has NOPL instruction
  70. /// (generally pentium pro+).
  71. bool HasNOPL = false;
  72. /// True if this processor has conditional move instructions
  73. /// (generally pentium pro+).
  74. bool HasCMov = false;
  75. /// True if the processor supports X86-64 instructions.
  76. bool HasX86_64 = false;
  77. /// True if the processor supports POPCNT.
  78. bool HasPOPCNT = false;
  79. /// True if the processor supports SSE4A instructions.
  80. bool HasSSE4A = false;
  81. /// Target has AES instructions
  82. bool HasAES = false;
  83. bool HasVAES = false;
  84. /// Target has FXSAVE/FXRESTOR instructions
  85. bool HasFXSR = false;
  86. /// Target has XSAVE instructions
  87. bool HasXSAVE = false;
  88. /// Target has XSAVEOPT instructions
  89. bool HasXSAVEOPT = false;
  90. /// Target has XSAVEC instructions
  91. bool HasXSAVEC = false;
  92. /// Target has XSAVES instructions
  93. bool HasXSAVES = false;
  94. /// Target has carry-less multiplication
  95. bool HasPCLMUL = false;
  96. bool HasVPCLMULQDQ = false;
  97. /// Target has Galois Field Arithmetic instructions
  98. bool HasGFNI = false;
  99. /// Target has 3-operand fused multiply-add
  100. bool HasFMA = false;
  101. /// Target has 4-operand fused multiply-add
  102. bool HasFMA4 = false;
  103. /// Target has XOP instructions
  104. bool HasXOP = false;
  105. /// Target has TBM instructions.
  106. bool HasTBM = false;
  107. /// Target has LWP instructions
  108. bool HasLWP = false;
  109. /// True if the processor has the MOVBE instruction.
  110. bool HasMOVBE = false;
  111. /// True if the processor has the RDRAND instruction.
  112. bool HasRDRAND = false;
  113. /// Processor has 16-bit floating point conversion instructions.
  114. bool HasF16C = false;
  115. /// Processor has FS/GS base insturctions.
  116. bool HasFSGSBase = false;
  117. /// Processor has LZCNT instruction.
  118. bool HasLZCNT = false;
  119. /// Processor has BMI1 instructions.
  120. bool HasBMI = false;
  121. /// Processor has BMI2 instructions.
  122. bool HasBMI2 = false;
  123. /// Processor has VBMI instructions.
  124. bool HasVBMI = false;
  125. /// Processor has VBMI2 instructions.
  126. bool HasVBMI2 = false;
  127. /// Processor has Integer Fused Multiply Add
  128. bool HasIFMA = false;
  129. /// Processor has RTM instructions.
  130. bool HasRTM = false;
  131. /// Processor has ADX instructions.
  132. bool HasADX = false;
  133. /// Processor has SHA instructions.
  134. bool HasSHA = false;
  135. /// Processor has PRFCHW instructions.
  136. bool HasPRFCHW = false;
  137. /// Processor has RDSEED instructions.
  138. bool HasRDSEED = false;
  139. /// Processor has LAHF/SAHF instructions in 64-bit mode.
  140. bool HasLAHFSAHF64 = false;
  141. /// Processor has MONITORX/MWAITX instructions.
  142. bool HasMWAITX = false;
  143. /// Processor has Cache Line Zero instruction
  144. bool HasCLZERO = false;
  145. /// Processor has Cache Line Demote instruction
  146. bool HasCLDEMOTE = false;
  147. /// Processor has MOVDIRI instruction (direct store integer).
  148. bool HasMOVDIRI = false;
  149. /// Processor has MOVDIR64B instruction (direct store 64 bytes).
  150. bool HasMOVDIR64B = false;
  151. /// Processor has ptwrite instruction.
  152. bool HasPTWRITE = false;
  153. /// Processor has Prefetch with intent to Write instruction
  154. bool HasPREFETCHWT1 = false;
  155. /// True if SHLD instructions are slow.
  156. bool IsSHLDSlow = false;
  157. /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
  158. // PMULUDQ.
  159. bool IsPMULLDSlow = false;
  160. /// True if the PMADDWD instruction is slow compared to PMULLD.
  161. bool IsPMADDWDSlow = false;
  162. /// True if unaligned memory accesses of 16-bytes are slow.
  163. bool IsUAMem16Slow = false;
  164. /// True if unaligned memory accesses of 32-bytes are slow.
  165. bool IsUAMem32Slow = false;
  166. /// True if SSE operations can have unaligned memory operands.
  167. /// This may require setting a configuration bit in the processor.
  168. bool HasSSEUnalignedMem = false;
  169. /// True if this processor has the CMPXCHG16B instruction;
  170. /// this is true for most x86-64 chips, but not the first AMD chips.
  171. bool HasCmpxchg16b = false;
  172. /// True if the LEA instruction should be used for adjusting
  173. /// the stack pointer. This is an optimization for Intel Atom processors.
  174. bool UseLeaForSP = false;
  175. /// True if POPCNT instruction has a false dependency on the destination register.
  176. bool HasPOPCNTFalseDeps = false;
  177. /// True if LZCNT/TZCNT instructions have a false dependency on the destination register.
  178. bool HasLZCNTFalseDeps = false;
  179. /// True if its preferable to combine to a single cross-lane shuffle
  180. /// using a variable mask over multiple fixed shuffles.
  181. bool HasFastVariableCrossLaneShuffle = false;
  182. /// True if its preferable to combine to a single per-lane shuffle
  183. /// using a variable mask over multiple fixed shuffles.
  184. bool HasFastVariablePerLaneShuffle = false;
  185. /// True if vzeroupper instructions should be inserted after code that uses
  186. /// ymm or zmm registers.
  187. bool InsertVZEROUPPER = false;
  188. /// True if there is no performance penalty for writing NOPs with up to
  189. /// 7 bytes.
  190. bool HasFast7ByteNOP = false;
  191. /// True if there is no performance penalty for writing NOPs with up to
  192. /// 11 bytes.
  193. bool HasFast11ByteNOP = false;
  194. /// True if there is no performance penalty for writing NOPs with up to
  195. /// 15 bytes.
  196. bool HasFast15ByteNOP = false;
  197. /// True if gather is reasonably fast. This is true for Skylake client and
  198. /// all AVX-512 CPUs.
  199. bool HasFastGather = false;
  200. /// True if hardware SQRTSS instruction is at least as fast (latency) as
  201. /// RSQRTSS followed by a Newton-Raphson iteration.
  202. bool HasFastScalarFSQRT = false;
  203. /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
  204. /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
  205. bool HasFastVectorFSQRT = false;
  206. /// True if 8-bit divisions are significantly faster than
  207. /// 32-bit divisions and should be used when possible.
  208. bool HasSlowDivide32 = false;
  209. /// True if 32-bit divides are significantly faster than
  210. /// 64-bit divisions and should be used when possible.
  211. bool HasSlowDivide64 = false;
  212. /// True if LZCNT instruction is fast.
  213. bool HasFastLZCNT = false;
  214. /// True if SHLD based rotate is fast.
  215. bool HasFastSHLDRotate = false;
  216. /// True if the processor supports macrofusion.
  217. bool HasMacroFusion = false;
  218. /// True if the processor supports branch fusion.
  219. bool HasBranchFusion = false;
  220. /// True if the processor has enhanced REP MOVSB/STOSB.
  221. bool HasERMSB = false;
  222. /// True if the processor has fast short REP MOV.
  223. bool HasFSRM = false;
  224. /// True if the short functions should be padded to prevent
  225. /// a stall when returning too early.
  226. bool PadShortFunctions = false;
  227. /// True if two memory operand instructions should use a temporary register
  228. /// instead.
  229. bool SlowTwoMemOps = false;
  230. /// True if the LEA instruction inputs have to be ready at address generation
  231. /// (AG) time.
  232. bool LEAUsesAG = false;
  233. /// True if the LEA instruction with certain arguments is slow
  234. bool SlowLEA = false;
  235. /// True if the LEA instruction has all three source operands: base, index,
  236. /// and offset or if the LEA instruction uses base and index registers where
  237. /// the base is EBP, RBP,or R13
  238. bool Slow3OpsLEA = false;
  239. /// True if INC and DEC instructions are slow when writing to flags
  240. bool SlowIncDec = false;
  241. /// Processor has AVX-512 PreFetch Instructions
  242. bool HasPFI = false;
  243. /// Processor has AVX-512 Exponential and Reciprocal Instructions
  244. bool HasERI = false;
  245. /// Processor has AVX-512 Conflict Detection Instructions
  246. bool HasCDI = false;
  247. /// Processor has AVX-512 population count Instructions
  248. bool HasVPOPCNTDQ = false;
  249. /// Processor has AVX-512 Doubleword and Quadword instructions
  250. bool HasDQI = false;
  251. /// Processor has AVX-512 Byte and Word instructions
  252. bool HasBWI = false;
  253. /// Processor has AVX-512 Vector Length eXtenstions
  254. bool HasVLX = false;
  255. /// Processor has AVX-512 16 bit floating-point extenstions
  256. bool HasFP16 = false;
  257. /// Processor has PKU extenstions
  258. bool HasPKU = false;
  259. /// Processor has AVX-512 Vector Neural Network Instructions
  260. bool HasVNNI = false;
  261. /// Processor has AVX Vector Neural Network Instructions
  262. bool HasAVXVNNI = false;
  263. /// Processor has AVX-512 bfloat16 floating-point extensions
  264. bool HasBF16 = false;
  265. /// Processor supports ENQCMD instructions
  266. bool HasENQCMD = false;
  267. /// Processor has AVX-512 Bit Algorithms instructions
  268. bool HasBITALG = false;
  269. /// Processor has AVX-512 vp2intersect instructions
  270. bool HasVP2INTERSECT = false;
  271. /// Processor supports CET SHSTK - Control-Flow Enforcement Technology
  272. /// using Shadow Stack
  273. bool HasSHSTK = false;
  274. /// Processor supports Invalidate Process-Context Identifier
  275. bool HasINVPCID = false;
  276. /// Processor has Software Guard Extensions
  277. bool HasSGX = false;
  278. /// Processor supports Flush Cache Line instruction
  279. bool HasCLFLUSHOPT = false;
  280. /// Processor supports Cache Line Write Back instruction
  281. bool HasCLWB = false;
  282. /// Processor supports Write Back No Invalidate instruction
  283. bool HasWBNOINVD = false;
  284. /// Processor support RDPID instruction
  285. bool HasRDPID = false;
  286. /// Processor supports WaitPKG instructions
  287. bool HasWAITPKG = false;
  288. /// Processor supports PCONFIG instruction
  289. bool HasPCONFIG = false;
  290. /// Processor support key locker instructions
  291. bool HasKL = false;
  292. /// Processor support key locker wide instructions
  293. bool HasWIDEKL = false;
  294. /// Processor supports HRESET instruction
  295. bool HasHRESET = false;
  296. /// Processor supports SERIALIZE instruction
  297. bool HasSERIALIZE = false;
  298. /// Processor supports TSXLDTRK instruction
  299. bool HasTSXLDTRK = false;
  300. /// Processor has AMX support
  301. bool HasAMXTILE = false;
  302. bool HasAMXBF16 = false;
  303. bool HasAMXINT8 = false;
  304. /// Processor supports User Level Interrupt instructions
  305. bool HasUINTR = false;
  306. /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
  307. /// function is GPR only)
  308. bool HasCRC32 = false;
  309. /// Processor has a single uop BEXTR implementation.
  310. bool HasFastBEXTR = false;
  311. /// Try harder to combine to horizontal vector ops if they are fast.
  312. bool HasFastHorizontalOps = false;
  313. /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
  314. bool HasFastScalarShiftMasks = false;
  315. /// Prefer a left/right vector logical shifts pair over a shift+and pair.
  316. bool HasFastVectorShiftMasks = false;
  317. /// Prefer a movbe over a single-use load + bswap / single-use bswap + store.
  318. bool HasFastMOVBE = false;
  319. /// Use a retpoline thunk rather than indirect calls to block speculative
  320. /// execution.
  321. bool UseRetpolineIndirectCalls = false;
  322. /// Use a retpoline thunk or remove any indirect branch to block speculative
  323. /// execution.
  324. bool UseRetpolineIndirectBranches = false;
  325. /// Deprecated flag, query `UseRetpolineIndirectCalls` and
  326. /// `UseRetpolineIndirectBranches` instead.
  327. bool DeprecatedUseRetpoline = false;
  328. /// When using a retpoline thunk, call an externally provided thunk rather
  329. /// than emitting one inside the compiler.
  330. bool UseRetpolineExternalThunk = false;
  331. /// Prevent generation of indirect call/branch instructions from memory,
  332. /// and force all indirect call/branch instructions from a register to be
  333. /// preceded by an LFENCE. Also decompose RET instructions into a
  334. /// POP+LFENCE+JMP sequence.
  335. bool UseLVIControlFlowIntegrity = false;
  336. /// Enable Speculative Execution Side Effect Suppression
  337. bool UseSpeculativeExecutionSideEffectSuppression = false;
  338. /// Insert LFENCE instructions to prevent data speculatively injected into
  339. /// loads from being used maliciously.
  340. bool UseLVILoadHardening = false;
  341. /// Use an instruction sequence for taking the address of a global that allows
  342. /// a memory tag in the upper address bits.
  343. bool AllowTaggedGlobals = false;
  344. /// Use software floating point for code generation.
  345. bool UseSoftFloat = false;
  346. /// Use alias analysis during code generation.
  347. bool UseAA = false;
  348. /// The minimum alignment known to hold of the stack frame on
  349. /// entry to the function and which must be maintained by every function.
  350. Align stackAlignment = Align(4);
  351. Align TileConfigAlignment = Align(4);
  352. /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
  353. ///
  354. // FIXME: this is a known good value for Yonah. How about others?
  355. unsigned MaxInlineSizeThreshold = 128;
  356. /// Indicates target prefers 128 bit instructions.
  357. bool Prefer128Bit = false;
  358. /// Indicates target prefers 256 bit instructions.
  359. bool Prefer256Bit = false;
  360. /// Indicates target prefers AVX512 mask registers.
  361. bool PreferMaskRegisters = false;
  362. /// Use Silvermont specific arithmetic costs.
  363. bool UseSLMArithCosts = false;
  364. /// Use Goldmont specific floating point div/sqrt costs.
  365. bool UseGLMDivSqrtCosts = false;
  366. /// What processor and OS we're targeting.
  367. Triple TargetTriple;
  368. /// GlobalISel related APIs.
  369. std::unique_ptr<CallLowering> CallLoweringInfo;
  370. std::unique_ptr<LegalizerInfo> Legalizer;
  371. std::unique_ptr<RegisterBankInfo> RegBankInfo;
  372. std::unique_ptr<InstructionSelector> InstSelector;
  373. private:
  374. /// Override the stack alignment.
  375. MaybeAlign StackAlignOverride;
  376. /// Preferred vector width from function attribute.
  377. unsigned PreferVectorWidthOverride;
  378. /// Resolved preferred vector width from function attribute and subtarget
  379. /// features.
  380. unsigned PreferVectorWidth = UINT32_MAX;
  381. /// Required vector width from function attribute.
  382. unsigned RequiredVectorWidth;
  383. /// True if compiling for 64-bit, false for 16-bit or 32-bit.
  384. bool In64BitMode = false;
  385. /// True if compiling for 32-bit, false for 16-bit or 64-bit.
  386. bool In32BitMode = false;
  387. /// True if compiling for 16-bit, false for 32-bit or 64-bit.
  388. bool In16BitMode = false;
  389. X86SelectionDAGInfo TSInfo;
  390. // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
  391. // X86TargetLowering needs.
  392. X86InstrInfo InstrInfo;
  393. X86TargetLowering TLInfo;
  394. X86FrameLowering FrameLowering;
  395. public:
  396. /// This constructor initializes the data members to match that
  397. /// of the specified triple.
  398. ///
  399. X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
  400. const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
  401. unsigned PreferVectorWidthOverride,
  402. unsigned RequiredVectorWidth);
  403. const X86TargetLowering *getTargetLowering() const override {
  404. return &TLInfo;
  405. }
  406. const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
  407. const X86FrameLowering *getFrameLowering() const override {
  408. return &FrameLowering;
  409. }
  410. const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
  411. return &TSInfo;
  412. }
  413. const X86RegisterInfo *getRegisterInfo() const override {
  414. return &getInstrInfo()->getRegisterInfo();
  415. }
  416. unsigned getTileConfigSize() const { return 64; }
  417. Align getTileConfigAlignment() const { return TileConfigAlignment; }
  418. /// Returns the minimum alignment known to hold of the
  419. /// stack frame on entry to the function and which must be maintained by every
  420. /// function for this subtarget.
  421. Align getStackAlignment() const { return stackAlignment; }
  422. /// Returns the maximum memset / memcpy size
  423. /// that still makes it profitable to inline the call.
  424. unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
  425. /// ParseSubtargetFeatures - Parses features string setting specified
  426. /// subtarget options. Definition of function is auto generated by tblgen.
  427. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
  428. /// Methods used by Global ISel
  429. const CallLowering *getCallLowering() const override;
  430. InstructionSelector *getInstructionSelector() const override;
  431. const LegalizerInfo *getLegalizerInfo() const override;
  432. const RegisterBankInfo *getRegBankInfo() const override;
  433. private:
  434. /// Initialize the full set of dependencies so we can use an initializer
  435. /// list for X86Subtarget.
  436. X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
  437. StringRef TuneCPU,
  438. StringRef FS);
  439. void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
  440. public:
  441. /// Is this x86_64? (disregarding specific ABI / programming model)
  442. bool is64Bit() const {
  443. return In64BitMode;
  444. }
  445. bool is32Bit() const {
  446. return In32BitMode;
  447. }
  448. bool is16Bit() const {
  449. return In16BitMode;
  450. }
  451. /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
  452. bool isTarget64BitILP32() const {
  453. return In64BitMode && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
  454. }
  455. /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
  456. bool isTarget64BitLP64() const {
  457. return In64BitMode && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
  458. }
  459. PICStyles::Style getPICStyle() const { return PICStyle; }
  460. void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
  461. bool hasX87() const { return HasX87; }
  462. bool hasCmpxchg8b() const { return HasCmpxchg8b; }
  463. bool hasNOPL() const { return HasNOPL; }
  464. // SSE codegen depends on cmovs, and all SSE1+ processors support them.
  465. // All 64-bit processors support cmov.
  466. bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
  467. bool hasSSE1() const { return X86SSELevel >= SSE1; }
  468. bool hasSSE2() const { return X86SSELevel >= SSE2; }
  469. bool hasSSE3() const { return X86SSELevel >= SSE3; }
  470. bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
  471. bool hasSSE41() const { return X86SSELevel >= SSE41; }
  472. bool hasSSE42() const { return X86SSELevel >= SSE42; }
  473. bool hasAVX() const { return X86SSELevel >= AVX; }
  474. bool hasAVX2() const { return X86SSELevel >= AVX2; }
  475. bool hasAVX512() const { return X86SSELevel >= AVX512F; }
  476. bool hasInt256() const { return hasAVX2(); }
  477. bool hasSSE4A() const { return HasSSE4A; }
  478. bool hasMMX() const { return X863DNowLevel >= MMX; }
  479. bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
  480. bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
  481. bool hasPOPCNT() const { return HasPOPCNT; }
  482. bool hasAES() const { return HasAES; }
  483. bool hasVAES() const { return HasVAES; }
  484. bool hasFXSR() const { return HasFXSR; }
  485. bool hasXSAVE() const { return HasXSAVE; }
  486. bool hasXSAVEOPT() const { return HasXSAVEOPT; }
  487. bool hasXSAVEC() const { return HasXSAVEC; }
  488. bool hasXSAVES() const { return HasXSAVES; }
  489. bool hasPCLMUL() const { return HasPCLMUL; }
  490. bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
  491. bool hasGFNI() const { return HasGFNI; }
  492. // Prefer FMA4 to FMA - its better for commutation/memory folding and
  493. // has equal or better performance on all supported targets.
  494. bool hasFMA() const { return HasFMA; }
  495. bool hasFMA4() const { return HasFMA4; }
  496. bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
  497. bool hasXOP() const { return HasXOP; }
  498. bool hasTBM() const { return HasTBM; }
  499. bool hasLWP() const { return HasLWP; }
  500. bool hasMOVBE() const { return HasMOVBE; }
  501. bool hasRDRAND() const { return HasRDRAND; }
  502. bool hasF16C() const { return HasF16C; }
  503. bool hasFSGSBase() const { return HasFSGSBase; }
  504. bool hasLZCNT() const { return HasLZCNT; }
  505. bool hasBMI() const { return HasBMI; }
  506. bool hasBMI2() const { return HasBMI2; }
  507. bool hasVBMI() const { return HasVBMI; }
  508. bool hasVBMI2() const { return HasVBMI2; }
  509. bool hasIFMA() const { return HasIFMA; }
  510. bool hasRTM() const { return HasRTM; }
  511. bool hasADX() const { return HasADX; }
  512. bool hasSHA() const { return HasSHA; }
  513. bool hasPRFCHW() const { return HasPRFCHW; }
  514. bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
  515. bool hasPrefetchW() const {
  516. // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
  517. // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
  518. // it and KNL has another that prefetches to L2 cache. We assume the
  519. // L1 version exists if the L2 version does.
  520. return has3DNow() || hasPRFCHW() || hasPREFETCHWT1();
  521. }
  522. bool hasSSEPrefetch() const {
  523. // We implicitly enable these when we have a write prefix supporting cache
  524. // level OR if we have prfchw, but don't already have a read prefetch from
  525. // 3dnow.
  526. return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
  527. }
  528. bool hasRDSEED() const { return HasRDSEED; }
  529. bool hasLAHFSAHF() const { return HasLAHFSAHF64 || !is64Bit(); }
  530. bool hasMWAITX() const { return HasMWAITX; }
  531. bool hasCLZERO() const { return HasCLZERO; }
  532. bool hasCLDEMOTE() const { return HasCLDEMOTE; }
  533. bool hasMOVDIRI() const { return HasMOVDIRI; }
  534. bool hasMOVDIR64B() const { return HasMOVDIR64B; }
  535. bool hasPTWRITE() const { return HasPTWRITE; }
  536. bool isSHLDSlow() const { return IsSHLDSlow; }
  537. bool isPMULLDSlow() const { return IsPMULLDSlow; }
  538. bool isPMADDWDSlow() const { return IsPMADDWDSlow; }
  539. bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
  540. bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
  541. bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
  542. bool hasCmpxchg16b() const { return HasCmpxchg16b && is64Bit(); }
  543. bool useLeaForSP() const { return UseLeaForSP; }
  544. bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; }
  545. bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; }
  546. bool hasFastVariableCrossLaneShuffle() const {
  547. return HasFastVariableCrossLaneShuffle;
  548. }
  549. bool hasFastVariablePerLaneShuffle() const {
  550. return HasFastVariablePerLaneShuffle;
  551. }
  552. bool insertVZEROUPPER() const { return InsertVZEROUPPER; }
  553. bool hasFastGather() const { return HasFastGather; }
  554. bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
  555. bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
  556. bool hasFastLZCNT() const { return HasFastLZCNT; }
  557. bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
  558. bool hasFastBEXTR() const { return HasFastBEXTR; }
  559. bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
  560. bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
  561. bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
  562. bool hasFastMOVBE() const { return HasFastMOVBE; }
  563. bool hasMacroFusion() const { return HasMacroFusion; }
  564. bool hasBranchFusion() const { return HasBranchFusion; }
  565. bool hasERMSB() const { return HasERMSB; }
  566. bool hasFSRM() const { return HasFSRM; }
  567. bool hasSlowDivide32() const { return HasSlowDivide32; }
  568. bool hasSlowDivide64() const { return HasSlowDivide64; }
  569. bool padShortFunctions() const { return PadShortFunctions; }
  570. bool slowTwoMemOps() const { return SlowTwoMemOps; }
  571. bool LEAusesAG() const { return LEAUsesAG; }
  572. bool slowLEA() const { return SlowLEA; }
  573. bool slow3OpsLEA() const { return Slow3OpsLEA; }
  574. bool slowIncDec() const { return SlowIncDec; }
  575. bool hasCDI() const { return HasCDI; }
  576. bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
  577. bool hasPFI() const { return HasPFI; }
  578. bool hasERI() const { return HasERI; }
  579. bool hasDQI() const { return HasDQI; }
  580. bool hasBWI() const { return HasBWI; }
  581. bool hasVLX() const { return HasVLX; }
  582. bool hasFP16() const { return HasFP16; }
  583. bool hasPKU() const { return HasPKU; }
  584. bool hasVNNI() const { return HasVNNI; }
  585. bool hasBF16() const { return HasBF16; }
  586. bool hasVP2INTERSECT() const { return HasVP2INTERSECT; }
  587. bool hasBITALG() const { return HasBITALG; }
  588. bool hasSHSTK() const { return HasSHSTK; }
  589. bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
  590. bool hasCLWB() const { return HasCLWB; }
  591. bool hasWBNOINVD() const { return HasWBNOINVD; }
  592. bool hasRDPID() const { return HasRDPID; }
  593. bool hasWAITPKG() const { return HasWAITPKG; }
  594. bool hasPCONFIG() const { return HasPCONFIG; }
  595. bool hasSGX() const { return HasSGX; }
  596. bool hasINVPCID() const { return HasINVPCID; }
  597. bool hasENQCMD() const { return HasENQCMD; }
  598. bool hasKL() const { return HasKL; }
  599. bool hasWIDEKL() const { return HasWIDEKL; }
  600. bool hasHRESET() const { return HasHRESET; }
  601. bool hasSERIALIZE() const { return HasSERIALIZE; }
  602. bool hasTSXLDTRK() const { return HasTSXLDTRK; }
  603. bool hasUINTR() const { return HasUINTR; }
  604. bool hasCRC32() const { return HasCRC32; }
  605. bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
  606. bool useRetpolineIndirectBranches() const {
  607. return UseRetpolineIndirectBranches;
  608. }
  609. bool hasAVXVNNI() const { return HasAVXVNNI; }
  610. bool hasAMXTILE() const { return HasAMXTILE; }
  611. bool hasAMXBF16() const { return HasAMXBF16; }
  612. bool hasAMXINT8() const { return HasAMXINT8; }
  613. bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
  614. // These are generic getters that OR together all of the thunk types
  615. // supported by the subtarget. Therefore useIndirectThunk*() will return true
  616. // if any respective thunk feature is enabled.
  617. bool useIndirectThunkCalls() const {
  618. return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
  619. }
  620. bool useIndirectThunkBranches() const {
  621. return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
  622. }
  623. bool preferMaskRegisters() const { return PreferMaskRegisters; }
  624. bool useSLMArithCosts() const { return UseSLMArithCosts; }
  625. bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
  626. bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
  627. bool allowTaggedGlobals() const { return AllowTaggedGlobals; }
  628. bool useLVILoadHardening() const { return UseLVILoadHardening; }
  629. bool useSpeculativeExecutionSideEffectSuppression() const {
  630. return UseSpeculativeExecutionSideEffectSuppression;
  631. }
  632. unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
  633. unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
  634. // Helper functions to determine when we should allow widening to 512-bit
  635. // during codegen.
  636. // TODO: Currently we're always allowing widening on CPUs without VLX,
  637. // because for many cases we don't have a better option.
  638. bool canExtendTo512DQ() const {
  639. return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
  640. }
  641. bool canExtendTo512BW() const {
  642. return hasBWI() && canExtendTo512DQ();
  643. }
  644. // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
  645. // disable them in the legalizer.
  646. bool useAVX512Regs() const {
  647. return hasAVX512() && (canExtendTo512DQ() || RequiredVectorWidth > 256);
  648. }
  649. bool useBWIRegs() const {
  650. return hasBWI() && useAVX512Regs();
  651. }
  652. bool isXRaySupported() const override { return is64Bit(); }
  653. /// TODO: to be removed later and replaced with suitable properties
  654. bool isAtom() const { return X86ProcFamily == IntelAtom; }
  655. bool useSoftFloat() const { return UseSoftFloat; }
  656. bool useAA() const override { return UseAA; }
  657. /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
  658. /// no-sse2). There isn't any reason to disable it if the target processor
  659. /// supports it.
  660. bool hasMFence() const { return hasSSE2() || is64Bit(); }
  661. const Triple &getTargetTriple() const { return TargetTriple; }
  662. bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
  663. bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
  664. bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
  665. bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
  666. bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
  667. bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
  668. bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
  669. bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
  670. bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
  671. bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
  672. bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
  673. bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
  674. bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
  675. bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
  676. bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
  677. bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
  678. bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
  679. bool isTargetWindowsMSVC() const {
  680. return TargetTriple.isWindowsMSVCEnvironment();
  681. }
  682. bool isTargetWindowsCoreCLR() const {
  683. return TargetTriple.isWindowsCoreCLREnvironment();
  684. }
  685. bool isTargetWindowsCygwin() const {
  686. return TargetTriple.isWindowsCygwinEnvironment();
  687. }
  688. bool isTargetWindowsGNU() const {
  689. return TargetTriple.isWindowsGNUEnvironment();
  690. }
  691. bool isTargetWindowsItanium() const {
  692. return TargetTriple.isWindowsItaniumEnvironment();
  693. }
  694. bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
  695. bool isOSWindows() const { return TargetTriple.isOSWindows(); }
  696. bool isTargetWin64() const { return In64BitMode && isOSWindows(); }
  697. bool isTargetWin32() const { return !In64BitMode && isOSWindows(); }
  698. bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
  699. bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
  700. bool isPICStyleStubPIC() const {
  701. return PICStyle == PICStyles::Style::StubPIC;
  702. }
  703. bool isPositionIndependent() const;
  704. bool isCallingConvWin64(CallingConv::ID CC) const {
  705. switch (CC) {
  706. // On Win64, all these conventions just use the default convention.
  707. case CallingConv::C:
  708. case CallingConv::Fast:
  709. case CallingConv::Tail:
  710. case CallingConv::Swift:
  711. case CallingConv::SwiftTail:
  712. case CallingConv::X86_FastCall:
  713. case CallingConv::X86_StdCall:
  714. case CallingConv::X86_ThisCall:
  715. case CallingConv::X86_VectorCall:
  716. case CallingConv::Intel_OCL_BI:
  717. return isTargetWin64();
  718. // This convention allows using the Win64 convention on other targets.
  719. case CallingConv::Win64:
  720. return true;
  721. // This convention allows using the SysV convention on Windows targets.
  722. case CallingConv::X86_64_SysV:
  723. return false;
  724. // Otherwise, who knows what this is.
  725. default:
  726. return false;
  727. }
  728. }
  729. /// Classify a global variable reference for the current subtarget according
  730. /// to how we should reference it in a non-pcrel context.
  731. unsigned char classifyLocalReference(const GlobalValue *GV) const;
  732. unsigned char classifyGlobalReference(const GlobalValue *GV,
  733. const Module &M) const;
  734. unsigned char classifyGlobalReference(const GlobalValue *GV) const;
  735. /// Classify a global function reference for the current subtarget.
  736. unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
  737. const Module &M) const;
  738. unsigned char classifyGlobalFunctionReference(const GlobalValue *GV) const;
  739. /// Classify a blockaddress reference for the current subtarget according to
  740. /// how we should reference it in a non-pcrel context.
  741. unsigned char classifyBlockAddressReference() const;
  742. /// Return true if the subtarget allows calls to immediate address.
  743. bool isLegalToCallImmediateAddr() const;
  744. /// Return whether FrameLowering should always set the "extended frame
  745. /// present" bit in FP, or set it based on a symbol in the runtime.
  746. bool swiftAsyncContextIsDynamicallySet() const {
  747. // Older OS versions (particularly system unwinders) are confused by the
  748. // Swift extended frame, so when building code that might be run on them we
  749. // must dynamically query the concurrency library to determine whether
  750. // extended frames should be flagged as present.
  751. const Triple &TT = getTargetTriple();
  752. unsigned Major = TT.getOSVersion().getMajor();
  753. switch(TT.getOS()) {
  754. default:
  755. return false;
  756. case Triple::IOS:
  757. case Triple::TvOS:
  758. return Major < 15;
  759. case Triple::WatchOS:
  760. return Major < 8;
  761. case Triple::MacOSX:
  762. case Triple::Darwin:
  763. return Major < 12;
  764. }
  765. }
  766. /// If we are using indirect thunks, we need to expand indirectbr to avoid it
  767. /// lowering to an actual indirect jump.
  768. bool enableIndirectBrExpand() const override {
  769. return useIndirectThunkBranches();
  770. }
  771. /// Enable the MachineScheduler pass for all X86 subtargets.
  772. bool enableMachineScheduler() const override { return true; }
  773. bool enableEarlyIfConversion() const override;
  774. void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
  775. &Mutations) const override;
  776. AntiDepBreakMode getAntiDepBreakMode() const override {
  777. return TargetSubtargetInfo::ANTIDEP_CRITICAL;
  778. }
  779. bool enableAdvancedRASplitCost() const override { return false; }
  780. };
  781. } // end namespace llvm
  782. #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H