X86TargetMachine.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658
  1. //===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the X86 specific subclass of TargetMachine.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "X86TargetMachine.h"
  13. #include "MCTargetDesc/X86MCTargetDesc.h"
  14. #include "TargetInfo/X86TargetInfo.h"
  15. #include "X86.h"
  16. #include "X86CallLowering.h"
  17. #include "X86LegalizerInfo.h"
  18. #include "X86MachineFunctionInfo.h"
  19. #include "X86MacroFusion.h"
  20. #include "X86Subtarget.h"
  21. #include "X86TargetObjectFile.h"
  22. #include "X86TargetTransformInfo.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/SmallString.h"
  25. #include "llvm/ADT/StringRef.h"
  26. #include "llvm/ADT/Triple.h"
  27. #include "llvm/Analysis/TargetTransformInfo.h"
  28. #include "llvm/CodeGen/ExecutionDomainFix.h"
  29. #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
  30. #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  31. #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  32. #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  33. #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  34. #include "llvm/CodeGen/GlobalISel/Legalizer.h"
  35. #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
  36. #include "llvm/CodeGen/MachineScheduler.h"
  37. #include "llvm/CodeGen/Passes.h"
  38. #include "llvm/CodeGen/RegAllocRegistry.h"
  39. #include "llvm/CodeGen/TargetPassConfig.h"
  40. #include "llvm/IR/Attributes.h"
  41. #include "llvm/IR/DataLayout.h"
  42. #include "llvm/IR/Function.h"
  43. #include "llvm/MC/MCAsmInfo.h"
  44. #include "llvm/MC/TargetRegistry.h"
  45. #include "llvm/Pass.h"
  46. #include "llvm/Support/CodeGen.h"
  47. #include "llvm/Support/CommandLine.h"
  48. #include "llvm/Support/ErrorHandling.h"
  49. #include "llvm/Target/TargetLoweringObjectFile.h"
  50. #include "llvm/Target/TargetOptions.h"
  51. #include "llvm/Transforms/CFGuard.h"
  52. #include <memory>
  53. #include <optional>
  54. #include <string>
  55. using namespace llvm;
  56. static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
  57. cl::desc("Enable the machine combiner pass"),
  58. cl::init(true), cl::Hidden);
  59. static cl::opt<bool>
  60. EnableTileRAPass("x86-tile-ra",
  61. cl::desc("Enable the tile register allocation pass"),
  62. cl::init(true), cl::Hidden);
  63. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
  64. // Register the target.
  65. RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
  66. RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target());
  67. PassRegistry &PR = *PassRegistry::getPassRegistry();
  68. initializeX86LowerAMXIntrinsicsLegacyPassPass(PR);
  69. initializeX86LowerAMXTypeLegacyPassPass(PR);
  70. initializeX86PreAMXConfigPassPass(PR);
  71. initializeX86PreTileConfigPass(PR);
  72. initializeGlobalISel(PR);
  73. initializeWinEHStatePassPass(PR);
  74. initializeFixupBWInstPassPass(PR);
  75. initializeEvexToVexInstPassPass(PR);
  76. initializeFixupLEAPassPass(PR);
  77. initializeFPSPass(PR);
  78. initializeX86FixupSetCCPassPass(PR);
  79. initializeX86CallFrameOptimizationPass(PR);
  80. initializeX86CmovConverterPassPass(PR);
  81. initializeX86TileConfigPass(PR);
  82. initializeX86FastPreTileConfigPass(PR);
  83. initializeX86FastTileConfigPass(PR);
  84. initializeX86KCFIPass(PR);
  85. initializeX86LowerTileCopyPass(PR);
  86. initializeX86ExpandPseudoPass(PR);
  87. initializeX86ExecutionDomainFixPass(PR);
  88. initializeX86DomainReassignmentPass(PR);
  89. initializeX86AvoidSFBPassPass(PR);
  90. initializeX86AvoidTrailingCallPassPass(PR);
  91. initializeX86SpeculativeLoadHardeningPassPass(PR);
  92. initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR);
  93. initializeX86FlagsCopyLoweringPassPass(PR);
  94. initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
  95. initializeX86LoadValueInjectionRetHardeningPassPass(PR);
  96. initializeX86OptimizeLEAPassPass(PR);
  97. initializeX86PartialReductionPass(PR);
  98. initializePseudoProbeInserterPass(PR);
  99. initializeX86ReturnThunksPass(PR);
  100. initializeX86DAGToDAGISelPass(PR);
  101. }
  102. static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  103. if (TT.isOSBinFormatMachO()) {
  104. if (TT.getArch() == Triple::x86_64)
  105. return std::make_unique<X86_64MachoTargetObjectFile>();
  106. return std::make_unique<TargetLoweringObjectFileMachO>();
  107. }
  108. if (TT.isOSBinFormatCOFF())
  109. return std::make_unique<TargetLoweringObjectFileCOFF>();
  110. return std::make_unique<X86ELFTargetObjectFile>();
  111. }
  112. static std::string computeDataLayout(const Triple &TT) {
  113. // X86 is little endian
  114. std::string Ret = "e";
  115. Ret += DataLayout::getManglingComponent(TT);
  116. // X86 and x32 have 32 bit pointers.
  117. if (!TT.isArch64Bit() || TT.isX32() || TT.isOSNaCl())
  118. Ret += "-p:32:32";
  119. // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers.
  120. Ret += "-p270:32:32-p271:32:32-p272:64:64";
  121. // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
  122. if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
  123. Ret += "-i64:64";
  124. else if (TT.isOSIAMCU())
  125. Ret += "-i64:32-f64:32";
  126. else
  127. Ret += "-f64:32:64";
  128. // Some ABIs align long double to 128 bits, others to 32.
  129. if (TT.isOSNaCl() || TT.isOSIAMCU())
  130. ; // No f80
  131. else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment())
  132. Ret += "-f80:128";
  133. else
  134. Ret += "-f80:32";
  135. if (TT.isOSIAMCU())
  136. Ret += "-f128:32";
  137. // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
  138. if (TT.isArch64Bit())
  139. Ret += "-n8:16:32:64";
  140. else
  141. Ret += "-n8:16:32";
  142. // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
  143. if ((!TT.isArch64Bit() && TT.isOSWindows()) || TT.isOSIAMCU())
  144. Ret += "-a:0:32-S32";
  145. else
  146. Ret += "-S128";
  147. return Ret;
  148. }
  149. static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT,
  150. std::optional<Reloc::Model> RM) {
  151. bool is64Bit = TT.getArch() == Triple::x86_64;
  152. if (!RM) {
  153. // JIT codegen should use static relocations by default, since it's
  154. // typically executed in process and not relocatable.
  155. if (JIT)
  156. return Reloc::Static;
  157. // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
  158. // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
  159. // use static relocation model by default.
  160. if (TT.isOSDarwin()) {
  161. if (is64Bit)
  162. return Reloc::PIC_;
  163. return Reloc::DynamicNoPIC;
  164. }
  165. if (TT.isOSWindows() && is64Bit)
  166. return Reloc::PIC_;
  167. return Reloc::Static;
  168. }
  169. // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
  170. // is defined as a model for code which may be used in static or dynamic
  171. // executables but not necessarily a shared library. On X86-32 we just
  172. // compile in -static mode, in x86-64 we use PIC.
  173. if (*RM == Reloc::DynamicNoPIC) {
  174. if (is64Bit)
  175. return Reloc::PIC_;
  176. if (!TT.isOSDarwin())
  177. return Reloc::Static;
  178. }
  179. // If we are on Darwin, disallow static relocation model in X86-64 mode, since
  180. // the Mach-O file format doesn't support it.
  181. if (*RM == Reloc::Static && TT.isOSDarwin() && is64Bit)
  182. return Reloc::PIC_;
  183. return *RM;
  184. }
  185. static CodeModel::Model
  186. getEffectiveX86CodeModel(std::optional<CodeModel::Model> CM, bool JIT,
  187. bool Is64Bit) {
  188. if (CM) {
  189. if (*CM == CodeModel::Tiny)
  190. report_fatal_error("Target does not support the tiny CodeModel", false);
  191. return *CM;
  192. }
  193. if (JIT)
  194. return Is64Bit ? CodeModel::Large : CodeModel::Small;
  195. return CodeModel::Small;
  196. }
  197. /// Create an X86 target.
  198. ///
  199. X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
  200. StringRef CPU, StringRef FS,
  201. const TargetOptions &Options,
  202. std::optional<Reloc::Model> RM,
  203. std::optional<CodeModel::Model> CM,
  204. CodeGenOpt::Level OL, bool JIT)
  205. : LLVMTargetMachine(
  206. T, computeDataLayout(TT), TT, CPU, FS, Options,
  207. getEffectiveRelocModel(TT, JIT, RM),
  208. getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
  209. OL),
  210. TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) {
  211. // On PS4/PS5, the "return address" of a 'noreturn' call must still be within
  212. // the calling function, and TrapUnreachable is an easy way to get that.
  213. if (TT.isPS() || TT.isOSBinFormatMachO()) {
  214. this->Options.TrapUnreachable = true;
  215. this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
  216. }
  217. setMachineOutliner(true);
  218. // x86 supports the debug entry values.
  219. setSupportsDebugEntryValues(true);
  220. initAsmInfo();
  221. }
  222. X86TargetMachine::~X86TargetMachine() = default;
  223. const X86Subtarget *
  224. X86TargetMachine::getSubtargetImpl(const Function &F) const {
  225. Attribute CPUAttr = F.getFnAttribute("target-cpu");
  226. Attribute TuneAttr = F.getFnAttribute("tune-cpu");
  227. Attribute FSAttr = F.getFnAttribute("target-features");
  228. StringRef CPU =
  229. CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU;
  230. // "x86-64" is a default target setting for many front ends. In these cases,
  231. // they actually request for "generic" tuning unless the "tune-cpu" was
  232. // specified.
  233. StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString()
  234. : CPU == "x86-64" ? "generic"
  235. : (StringRef)CPU;
  236. StringRef FS =
  237. FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS;
  238. SmallString<512> Key;
  239. // The additions here are ordered so that the definitely short strings are
  240. // added first so we won't exceed the small size. We append the
  241. // much longer FS string at the end so that we only heap allocate at most
  242. // one time.
  243. // Extract prefer-vector-width attribute.
  244. unsigned PreferVectorWidthOverride = 0;
  245. Attribute PreferVecWidthAttr = F.getFnAttribute("prefer-vector-width");
  246. if (PreferVecWidthAttr.isValid()) {
  247. StringRef Val = PreferVecWidthAttr.getValueAsString();
  248. unsigned Width;
  249. if (!Val.getAsInteger(0, Width)) {
  250. Key += 'p';
  251. Key += Val;
  252. PreferVectorWidthOverride = Width;
  253. }
  254. }
  255. // Extract min-legal-vector-width attribute.
  256. unsigned RequiredVectorWidth = UINT32_MAX;
  257. Attribute MinLegalVecWidthAttr = F.getFnAttribute("min-legal-vector-width");
  258. if (MinLegalVecWidthAttr.isValid()) {
  259. StringRef Val = MinLegalVecWidthAttr.getValueAsString();
  260. unsigned Width;
  261. if (!Val.getAsInteger(0, Width)) {
  262. Key += 'm';
  263. Key += Val;
  264. RequiredVectorWidth = Width;
  265. }
  266. }
  267. // Add CPU to the Key.
  268. Key += CPU;
  269. // Add tune CPU to the Key.
  270. Key += TuneCPU;
  271. // Keep track of the start of the feature portion of the string.
  272. unsigned FSStart = Key.size();
  273. // FIXME: This is related to the code below to reset the target options,
  274. // we need to know whether or not the soft float flag is set on the
  275. // function before we can generate a subtarget. We also need to use
  276. // it as a key for the subtarget since that can be the only difference
  277. // between two functions.
  278. bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
  279. // If the soft float attribute is set on the function turn on the soft float
  280. // subtarget feature.
  281. if (SoftFloat)
  282. Key += FS.empty() ? "+soft-float" : "+soft-float,";
  283. Key += FS;
  284. // We may have added +soft-float to the features so move the StringRef to
  285. // point to the full string in the Key.
  286. FS = Key.substr(FSStart);
  287. auto &I = SubtargetMap[Key];
  288. if (!I) {
  289. // This needs to be done before we create a new subtarget since any
  290. // creation will depend on the TM and the code generation flags on the
  291. // function that reside in TargetOptions.
  292. resetTargetOptions(F);
  293. I = std::make_unique<X86Subtarget>(
  294. TargetTriple, CPU, TuneCPU, FS, *this,
  295. MaybeAlign(F.getParent()->getOverrideStackAlignment()),
  296. PreferVectorWidthOverride, RequiredVectorWidth);
  297. }
  298. return I.get();
  299. }
  300. bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
  301. unsigned DestAS) const {
  302. assert(SrcAS != DestAS && "Expected different address spaces!");
  303. if (getPointerSize(SrcAS) != getPointerSize(DestAS))
  304. return false;
  305. return SrcAS < 256 && DestAS < 256;
  306. }
  307. //===----------------------------------------------------------------------===//
  308. // X86 TTI query.
  309. //===----------------------------------------------------------------------===//
  310. TargetTransformInfo
  311. X86TargetMachine::getTargetTransformInfo(const Function &F) const {
  312. return TargetTransformInfo(X86TTIImpl(this, F));
  313. }
  314. //===----------------------------------------------------------------------===//
  315. // Pass Pipeline Configuration
  316. //===----------------------------------------------------------------------===//
  317. namespace {
  318. /// X86 Code Generator Pass Configuration Options.
  319. class X86PassConfig : public TargetPassConfig {
  320. public:
  321. X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM)
  322. : TargetPassConfig(TM, PM) {}
  323. X86TargetMachine &getX86TargetMachine() const {
  324. return getTM<X86TargetMachine>();
  325. }
  326. ScheduleDAGInstrs *
  327. createMachineScheduler(MachineSchedContext *C) const override {
  328. ScheduleDAGMILive *DAG = createGenericSchedLive(C);
  329. DAG->addMutation(createX86MacroFusionDAGMutation());
  330. return DAG;
  331. }
  332. ScheduleDAGInstrs *
  333. createPostMachineScheduler(MachineSchedContext *C) const override {
  334. ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
  335. DAG->addMutation(createX86MacroFusionDAGMutation());
  336. return DAG;
  337. }
  338. void addIRPasses() override;
  339. bool addInstSelector() override;
  340. bool addIRTranslator() override;
  341. bool addLegalizeMachineIR() override;
  342. bool addRegBankSelect() override;
  343. bool addGlobalInstructionSelect() override;
  344. bool addILPOpts() override;
  345. bool addPreISel() override;
  346. void addMachineSSAOptimization() override;
  347. void addPreRegAlloc() override;
  348. bool addPostFastRegAllocRewrite() override;
  349. void addPostRegAlloc() override;
  350. void addPreEmitPass() override;
  351. void addPreEmitPass2() override;
  352. void addPreSched2() override;
  353. bool addRegAssignAndRewriteOptimized() override;
  354. std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
  355. };
  356. class X86ExecutionDomainFix : public ExecutionDomainFix {
  357. public:
  358. static char ID;
  359. X86ExecutionDomainFix() : ExecutionDomainFix(ID, X86::VR128XRegClass) {}
  360. StringRef getPassName() const override {
  361. return "X86 Execution Dependency Fix";
  362. }
  363. };
  364. char X86ExecutionDomainFix::ID;
  365. } // end anonymous namespace
  366. INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix",
  367. "X86 Execution Domain Fix", false, false)
  368. INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
  369. INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix",
  370. "X86 Execution Domain Fix", false, false)
  371. TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
  372. return new X86PassConfig(*this, PM);
  373. }
  374. MachineFunctionInfo *X86TargetMachine::createMachineFunctionInfo(
  375. BumpPtrAllocator &Allocator, const Function &F,
  376. const TargetSubtargetInfo *STI) const {
  377. return X86MachineFunctionInfo::create<X86MachineFunctionInfo>(Allocator, F,
  378. STI);
  379. }
  380. void X86PassConfig::addIRPasses() {
  381. addPass(createAtomicExpandPass());
  382. // We add both pass anyway and when these two passes run, we skip the pass
  383. // based on the option level and option attribute.
  384. addPass(createX86LowerAMXIntrinsicsPass());
  385. addPass(createX86LowerAMXTypePass());
  386. TargetPassConfig::addIRPasses();
  387. if (TM->getOptLevel() != CodeGenOpt::None) {
  388. addPass(createInterleavedAccessPass());
  389. addPass(createX86PartialReductionPass());
  390. }
  391. // Add passes that handle indirect branch removal and insertion of a retpoline
  392. // thunk. These will be a no-op unless a function subtarget has the retpoline
  393. // feature enabled.
  394. addPass(createIndirectBrExpandPass());
  395. // Add Control Flow Guard checks.
  396. const Triple &TT = TM->getTargetTriple();
  397. if (TT.isOSWindows()) {
  398. if (TT.getArch() == Triple::x86_64) {
  399. addPass(createCFGuardDispatchPass());
  400. } else {
  401. addPass(createCFGuardCheckPass());
  402. }
  403. }
  404. if (TM->Options.JMCInstrument)
  405. addPass(createJMCInstrumenterPass());
  406. }
  407. bool X86PassConfig::addInstSelector() {
  408. // Install an instruction selector.
  409. addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
  410. // For ELF, cleanup any local-dynamic TLS accesses.
  411. if (TM->getTargetTriple().isOSBinFormatELF() &&
  412. getOptLevel() != CodeGenOpt::None)
  413. addPass(createCleanupLocalDynamicTLSPass());
  414. addPass(createX86GlobalBaseRegPass());
  415. return false;
  416. }
  417. bool X86PassConfig::addIRTranslator() {
  418. addPass(new IRTranslator(getOptLevel()));
  419. return false;
  420. }
  421. bool X86PassConfig::addLegalizeMachineIR() {
  422. addPass(new Legalizer());
  423. return false;
  424. }
  425. bool X86PassConfig::addRegBankSelect() {
  426. addPass(new RegBankSelect());
  427. return false;
  428. }
  429. bool X86PassConfig::addGlobalInstructionSelect() {
  430. addPass(new InstructionSelect(getOptLevel()));
  431. return false;
  432. }
  433. bool X86PassConfig::addILPOpts() {
  434. addPass(&EarlyIfConverterID);
  435. if (EnableMachineCombinerPass)
  436. addPass(&MachineCombinerID);
  437. addPass(createX86CmovConverterPass());
  438. return true;
  439. }
  440. bool X86PassConfig::addPreISel() {
  441. // Only add this pass for 32-bit x86 Windows.
  442. const Triple &TT = TM->getTargetTriple();
  443. if (TT.isOSWindows() && TT.getArch() == Triple::x86)
  444. addPass(createX86WinEHStatePass());
  445. return true;
  446. }
  447. void X86PassConfig::addPreRegAlloc() {
  448. if (getOptLevel() != CodeGenOpt::None) {
  449. addPass(&LiveRangeShrinkID);
  450. addPass(createX86FixupSetCC());
  451. addPass(createX86OptimizeLEAs());
  452. addPass(createX86CallFrameOptimization());
  453. addPass(createX86AvoidStoreForwardingBlocks());
  454. }
  455. addPass(createX86SpeculativeLoadHardeningPass());
  456. addPass(createX86FlagsCopyLoweringPass());
  457. addPass(createX86DynAllocaExpander());
  458. if (getOptLevel() != CodeGenOpt::None)
  459. addPass(createX86PreTileConfigPass());
  460. else
  461. addPass(createX86FastPreTileConfigPass());
  462. }
  463. void X86PassConfig::addMachineSSAOptimization() {
  464. addPass(createX86DomainReassignmentPass());
  465. TargetPassConfig::addMachineSSAOptimization();
  466. }
  467. void X86PassConfig::addPostRegAlloc() {
  468. addPass(createX86LowerTileCopyPass());
  469. addPass(createX86FloatingPointStackifierPass());
  470. // When -O0 is enabled, the Load Value Injection Hardening pass will fall back
  471. // to using the Speculative Execution Side Effect Suppression pass for
  472. // mitigation. This is to prevent slow downs due to
  473. // analyses needed by the LVIHardening pass when compiling at -O0.
  474. if (getOptLevel() != CodeGenOpt::None)
  475. addPass(createX86LoadValueInjectionLoadHardeningPass());
  476. }
  477. void X86PassConfig::addPreSched2() {
  478. addPass(createX86ExpandPseudoPass());
  479. addPass(createX86KCFIPass());
  480. }
  481. void X86PassConfig::addPreEmitPass() {
  482. if (getOptLevel() != CodeGenOpt::None) {
  483. addPass(new X86ExecutionDomainFix());
  484. addPass(createBreakFalseDeps());
  485. }
  486. addPass(createX86IndirectBranchTrackingPass());
  487. addPass(createX86IssueVZeroUpperPass());
  488. if (getOptLevel() != CodeGenOpt::None) {
  489. addPass(createX86FixupBWInsts());
  490. addPass(createX86PadShortFunctions());
  491. addPass(createX86FixupLEAs());
  492. }
  493. addPass(createX86EvexToVexInsts());
  494. addPass(createX86DiscriminateMemOpsPass());
  495. addPass(createX86InsertPrefetchPass());
  496. addPass(createX86InsertX87waitPass());
  497. }
  498. void X86PassConfig::addPreEmitPass2() {
  499. const Triple &TT = TM->getTargetTriple();
  500. const MCAsmInfo *MAI = TM->getMCAsmInfo();
  501. // The X86 Speculative Execution Pass must run after all control
  502. // flow graph modifying passes. As a result it was listed to run right before
  503. // the X86 Retpoline Thunks pass. The reason it must run after control flow
  504. // graph modifications is that the model of LFENCE in LLVM has to be updated
  505. // (FIXME: https://bugs.llvm.org/show_bug.cgi?id=45167). Currently the
  506. // placement of this pass was hand checked to ensure that the subsequent
  507. // passes don't move the code around the LFENCEs in a way that will hurt the
  508. // correctness of this pass. This placement has been shown to work based on
  509. // hand inspection of the codegen output.
  510. addPass(createX86SpeculativeExecutionSideEffectSuppression());
  511. addPass(createX86IndirectThunksPass());
  512. addPass(createX86ReturnThunksPass());
  513. // Insert extra int3 instructions after trailing call instructions to avoid
  514. // issues in the unwinder.
  515. if (TT.isOSWindows() && TT.getArch() == Triple::x86_64)
  516. addPass(createX86AvoidTrailingCallPass());
  517. // Verify basic block incoming and outgoing cfa offset and register values and
  518. // correct CFA calculation rule where needed by inserting appropriate CFI
  519. // instructions.
  520. if (!TT.isOSDarwin() &&
  521. (!TT.isOSWindows() ||
  522. MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
  523. addPass(createCFIInstrInserter());
  524. if (TT.isOSWindows()) {
  525. // Identify valid longjmp targets for Windows Control Flow Guard.
  526. addPass(createCFGuardLongjmpPass());
  527. // Identify valid eh continuation targets for Windows EHCont Guard.
  528. addPass(createEHContGuardCatchretPass());
  529. }
  530. addPass(createX86LoadValueInjectionRetHardeningPass());
  531. // Insert pseudo probe annotation for callsite profiling
  532. addPass(createPseudoProbeInserter());
  533. // KCFI indirect call checks are lowered to a bundle, and on Darwin platforms,
  534. // also CALL_RVMARKER.
  535. addPass(createUnpackMachineBundles([&TT](const MachineFunction &MF) {
  536. // Only run bundle expansion if the module uses kcfi, or there are relevant
  537. // ObjC runtime functions present in the module.
  538. const Function &F = MF.getFunction();
  539. const Module *M = F.getParent();
  540. return M->getModuleFlag("kcfi") ||
  541. (TT.isOSDarwin() &&
  542. (M->getFunction("objc_retainAutoreleasedReturnValue") ||
  543. M->getFunction("objc_unsafeClaimAutoreleasedReturnValue")));
  544. }));
  545. }
  546. bool X86PassConfig::addPostFastRegAllocRewrite() {
  547. addPass(createX86FastTileConfigPass());
  548. return true;
  549. }
  550. std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
  551. return getStandardCSEConfigForOpt(TM->getOptLevel());
  552. }
  553. static bool onlyAllocateTileRegisters(const TargetRegisterInfo &TRI,
  554. const TargetRegisterClass &RC) {
  555. return static_cast<const X86RegisterInfo &>(TRI).isTileRegisterClass(&RC);
  556. }
  557. bool X86PassConfig::addRegAssignAndRewriteOptimized() {
  558. // Don't support tile RA when RA is specified by command line "-regalloc".
  559. if (!isCustomizedRegAlloc() && EnableTileRAPass) {
  560. // Allocate tile register first.
  561. addPass(createGreedyRegisterAllocator(onlyAllocateTileRegisters));
  562. addPass(createX86TileConfigPass());
  563. }
  564. return TargetPassConfig::addRegAssignAndRewriteOptimized();
  565. }