X86TargetMachine.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. //===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the X86 specific subclass of TargetMachine.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "X86TargetMachine.h"
  13. #include "MCTargetDesc/X86MCTargetDesc.h"
  14. #include "TargetInfo/X86TargetInfo.h"
  15. #include "X86.h"
  16. #include "X86CallLowering.h"
  17. #include "X86LegalizerInfo.h"
  18. #include "X86MacroFusion.h"
  19. #include "X86Subtarget.h"
  20. #include "X86TargetObjectFile.h"
  21. #include "X86TargetTransformInfo.h"
  22. #include "llvm/ADT/Optional.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/SmallString.h"
  25. #include "llvm/ADT/StringRef.h"
  26. #include "llvm/ADT/Triple.h"
  27. #include "llvm/Analysis/TargetTransformInfo.h"
  28. #include "llvm/CodeGen/ExecutionDomainFix.h"
  29. #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  30. #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  31. #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  32. #include "llvm/CodeGen/GlobalISel/Legalizer.h"
  33. #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
  34. #include "llvm/CodeGen/MachineScheduler.h"
  35. #include "llvm/CodeGen/Passes.h"
  36. #include "llvm/CodeGen/TargetPassConfig.h"
  37. #include "llvm/IR/Attributes.h"
  38. #include "llvm/IR/DataLayout.h"
  39. #include "llvm/IR/Function.h"
  40. #include "llvm/MC/MCAsmInfo.h"
  41. #include "llvm/MC/TargetRegistry.h"
  42. #include "llvm/Pass.h"
  43. #include "llvm/Support/CodeGen.h"
  44. #include "llvm/Support/CommandLine.h"
  45. #include "llvm/Support/ErrorHandling.h"
  46. #include "llvm/Target/TargetLoweringObjectFile.h"
  47. #include "llvm/Target/TargetOptions.h"
  48. #include "llvm/Transforms/CFGuard.h"
  49. #include <memory>
  50. #include <string>
  51. using namespace llvm;
  52. static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
  53. cl::desc("Enable the machine combiner pass"),
  54. cl::init(true), cl::Hidden);
  55. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
  56. // Register the target.
  57. RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
  58. RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target());
  59. PassRegistry &PR = *PassRegistry::getPassRegistry();
  60. initializeX86LowerAMXIntrinsicsLegacyPassPass(PR);
  61. initializeX86LowerAMXTypeLegacyPassPass(PR);
  62. initializeX86PreAMXConfigPassPass(PR);
  63. initializeGlobalISel(PR);
  64. initializeWinEHStatePassPass(PR);
  65. initializeFixupBWInstPassPass(PR);
  66. initializeEvexToVexInstPassPass(PR);
  67. initializeFixupLEAPassPass(PR);
  68. initializeFPSPass(PR);
  69. initializeX86FixupSetCCPassPass(PR);
  70. initializeX86CallFrameOptimizationPass(PR);
  71. initializeX86CmovConverterPassPass(PR);
  72. initializeX86TileConfigPass(PR);
  73. initializeX86FastTileConfigPass(PR);
  74. initializeX86LowerTileCopyPass(PR);
  75. initializeX86ExpandPseudoPass(PR);
  76. initializeX86ExecutionDomainFixPass(PR);
  77. initializeX86DomainReassignmentPass(PR);
  78. initializeX86AvoidSFBPassPass(PR);
  79. initializeX86AvoidTrailingCallPassPass(PR);
  80. initializeX86SpeculativeLoadHardeningPassPass(PR);
  81. initializeX86SpeculativeExecutionSideEffectSuppressionPass(PR);
  82. initializeX86FlagsCopyLoweringPassPass(PR);
  83. initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
  84. initializeX86LoadValueInjectionRetHardeningPassPass(PR);
  85. initializeX86OptimizeLEAPassPass(PR);
  86. initializeX86PartialReductionPass(PR);
  87. initializePseudoProbeInserterPass(PR);
  88. }
  89. static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  90. if (TT.isOSBinFormatMachO()) {
  91. if (TT.getArch() == Triple::x86_64)
  92. return std::make_unique<X86_64MachoTargetObjectFile>();
  93. return std::make_unique<TargetLoweringObjectFileMachO>();
  94. }
  95. if (TT.isOSBinFormatCOFF())
  96. return std::make_unique<TargetLoweringObjectFileCOFF>();
  97. return std::make_unique<X86ELFTargetObjectFile>();
  98. }
  99. static std::string computeDataLayout(const Triple &TT) {
  100. // X86 is little endian
  101. std::string Ret = "e";
  102. Ret += DataLayout::getManglingComponent(TT);
  103. // X86 and x32 have 32 bit pointers.
  104. if (!TT.isArch64Bit() || TT.isX32() || TT.isOSNaCl())
  105. Ret += "-p:32:32";
  106. // Address spaces for 32 bit signed, 32 bit unsigned, and 64 bit pointers.
  107. Ret += "-p270:32:32-p271:32:32-p272:64:64";
  108. // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
  109. if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl())
  110. Ret += "-i64:64";
  111. else if (TT.isOSIAMCU())
  112. Ret += "-i64:32-f64:32";
  113. else
  114. Ret += "-f64:32:64";
  115. // Some ABIs align long double to 128 bits, others to 32.
  116. if (TT.isOSNaCl() || TT.isOSIAMCU())
  117. ; // No f80
  118. else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment())
  119. Ret += "-f80:128";
  120. else
  121. Ret += "-f80:32";
  122. if (TT.isOSIAMCU())
  123. Ret += "-f128:32";
  124. // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
  125. if (TT.isArch64Bit())
  126. Ret += "-n8:16:32:64";
  127. else
  128. Ret += "-n8:16:32";
  129. // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
  130. if ((!TT.isArch64Bit() && TT.isOSWindows()) || TT.isOSIAMCU())
  131. Ret += "-a:0:32-S32";
  132. else
  133. Ret += "-S128";
  134. return Ret;
  135. }
  136. static Reloc::Model getEffectiveRelocModel(const Triple &TT,
  137. bool JIT,
  138. Optional<Reloc::Model> RM) {
  139. bool is64Bit = TT.getArch() == Triple::x86_64;
  140. if (!RM.hasValue()) {
  141. // JIT codegen should use static relocations by default, since it's
  142. // typically executed in process and not relocatable.
  143. if (JIT)
  144. return Reloc::Static;
  145. // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
  146. // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
  147. // use static relocation model by default.
  148. if (TT.isOSDarwin()) {
  149. if (is64Bit)
  150. return Reloc::PIC_;
  151. return Reloc::DynamicNoPIC;
  152. }
  153. if (TT.isOSWindows() && is64Bit)
  154. return Reloc::PIC_;
  155. return Reloc::Static;
  156. }
  157. // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC
  158. // is defined as a model for code which may be used in static or dynamic
  159. // executables but not necessarily a shared library. On X86-32 we just
  160. // compile in -static mode, in x86-64 we use PIC.
  161. if (*RM == Reloc::DynamicNoPIC) {
  162. if (is64Bit)
  163. return Reloc::PIC_;
  164. if (!TT.isOSDarwin())
  165. return Reloc::Static;
  166. }
  167. // If we are on Darwin, disallow static relocation model in X86-64 mode, since
  168. // the Mach-O file format doesn't support it.
  169. if (*RM == Reloc::Static && TT.isOSDarwin() && is64Bit)
  170. return Reloc::PIC_;
  171. return *RM;
  172. }
  173. static CodeModel::Model getEffectiveX86CodeModel(Optional<CodeModel::Model> CM,
  174. bool JIT, bool Is64Bit) {
  175. if (CM) {
  176. if (*CM == CodeModel::Tiny)
  177. report_fatal_error("Target does not support the tiny CodeModel", false);
  178. return *CM;
  179. }
  180. if (JIT)
  181. return Is64Bit ? CodeModel::Large : CodeModel::Small;
  182. return CodeModel::Small;
  183. }
  184. /// Create an X86 target.
  185. ///
  186. X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
  187. StringRef CPU, StringRef FS,
  188. const TargetOptions &Options,
  189. Optional<Reloc::Model> RM,
  190. Optional<CodeModel::Model> CM,
  191. CodeGenOpt::Level OL, bool JIT)
  192. : LLVMTargetMachine(
  193. T, computeDataLayout(TT), TT, CPU, FS, Options,
  194. getEffectiveRelocModel(TT, JIT, RM),
  195. getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64),
  196. OL),
  197. TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) {
  198. // On PS4, the "return address" of a 'noreturn' call must still be within
  199. // the calling function, and TrapUnreachable is an easy way to get that.
  200. if (TT.isPS4() || TT.isOSBinFormatMachO()) {
  201. this->Options.TrapUnreachable = true;
  202. this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO();
  203. }
  204. setMachineOutliner(true);
  205. // x86 supports the debug entry values.
  206. setSupportsDebugEntryValues(true);
  207. initAsmInfo();
  208. }
  209. X86TargetMachine::~X86TargetMachine() = default;
  210. const X86Subtarget *
  211. X86TargetMachine::getSubtargetImpl(const Function &F) const {
  212. Attribute CPUAttr = F.getFnAttribute("target-cpu");
  213. Attribute TuneAttr = F.getFnAttribute("tune-cpu");
  214. Attribute FSAttr = F.getFnAttribute("target-features");
  215. StringRef CPU =
  216. CPUAttr.isValid() ? CPUAttr.getValueAsString() : (StringRef)TargetCPU;
  217. StringRef TuneCPU =
  218. TuneAttr.isValid() ? TuneAttr.getValueAsString() : (StringRef)CPU;
  219. StringRef FS =
  220. FSAttr.isValid() ? FSAttr.getValueAsString() : (StringRef)TargetFS;
  221. SmallString<512> Key;
  222. // The additions here are ordered so that the definitely short strings are
  223. // added first so we won't exceed the small size. We append the
  224. // much longer FS string at the end so that we only heap allocate at most
  225. // one time.
  226. // Extract prefer-vector-width attribute.
  227. unsigned PreferVectorWidthOverride = 0;
  228. Attribute PreferVecWidthAttr = F.getFnAttribute("prefer-vector-width");
  229. if (PreferVecWidthAttr.isValid()) {
  230. StringRef Val = PreferVecWidthAttr.getValueAsString();
  231. unsigned Width;
  232. if (!Val.getAsInteger(0, Width)) {
  233. Key += 'p';
  234. Key += Val;
  235. PreferVectorWidthOverride = Width;
  236. }
  237. }
  238. // Extract min-legal-vector-width attribute.
  239. unsigned RequiredVectorWidth = UINT32_MAX;
  240. Attribute MinLegalVecWidthAttr = F.getFnAttribute("min-legal-vector-width");
  241. if (MinLegalVecWidthAttr.isValid()) {
  242. StringRef Val = MinLegalVecWidthAttr.getValueAsString();
  243. unsigned Width;
  244. if (!Val.getAsInteger(0, Width)) {
  245. Key += 'm';
  246. Key += Val;
  247. RequiredVectorWidth = Width;
  248. }
  249. }
  250. // Add CPU to the Key.
  251. Key += CPU;
  252. // Add tune CPU to the Key.
  253. Key += TuneCPU;
  254. // Keep track of the start of the feature portion of the string.
  255. unsigned FSStart = Key.size();
  256. // FIXME: This is related to the code below to reset the target options,
  257. // we need to know whether or not the soft float flag is set on the
  258. // function before we can generate a subtarget. We also need to use
  259. // it as a key for the subtarget since that can be the only difference
  260. // between two functions.
  261. bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
  262. // If the soft float attribute is set on the function turn on the soft float
  263. // subtarget feature.
  264. if (SoftFloat)
  265. Key += FS.empty() ? "+soft-float" : "+soft-float,";
  266. Key += FS;
  267. // We may have added +soft-float to the features so move the StringRef to
  268. // point to the full string in the Key.
  269. FS = Key.substr(FSStart);
  270. auto &I = SubtargetMap[Key];
  271. if (!I) {
  272. // This needs to be done before we create a new subtarget since any
  273. // creation will depend on the TM and the code generation flags on the
  274. // function that reside in TargetOptions.
  275. resetTargetOptions(F);
  276. I = std::make_unique<X86Subtarget>(
  277. TargetTriple, CPU, TuneCPU, FS, *this,
  278. MaybeAlign(F.getParent()->getOverrideStackAlignment()),
  279. PreferVectorWidthOverride, RequiredVectorWidth);
  280. }
  281. return I.get();
  282. }
  283. bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
  284. unsigned DestAS) const {
  285. assert(SrcAS != DestAS && "Expected different address spaces!");
  286. if (getPointerSize(SrcAS) != getPointerSize(DestAS))
  287. return false;
  288. return SrcAS < 256 && DestAS < 256;
  289. }
  290. //===----------------------------------------------------------------------===//
  291. // X86 TTI query.
  292. //===----------------------------------------------------------------------===//
  293. TargetTransformInfo
  294. X86TargetMachine::getTargetTransformInfo(const Function &F) {
  295. return TargetTransformInfo(X86TTIImpl(this, F));
  296. }
  297. //===----------------------------------------------------------------------===//
  298. // Pass Pipeline Configuration
  299. //===----------------------------------------------------------------------===//
  300. namespace {
  301. /// X86 Code Generator Pass Configuration Options.
  302. class X86PassConfig : public TargetPassConfig {
  303. public:
  304. X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM)
  305. : TargetPassConfig(TM, PM) {}
  306. X86TargetMachine &getX86TargetMachine() const {
  307. return getTM<X86TargetMachine>();
  308. }
  309. ScheduleDAGInstrs *
  310. createMachineScheduler(MachineSchedContext *C) const override {
  311. ScheduleDAGMILive *DAG = createGenericSchedLive(C);
  312. DAG->addMutation(createX86MacroFusionDAGMutation());
  313. return DAG;
  314. }
  315. ScheduleDAGInstrs *
  316. createPostMachineScheduler(MachineSchedContext *C) const override {
  317. ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
  318. DAG->addMutation(createX86MacroFusionDAGMutation());
  319. return DAG;
  320. }
  321. void addIRPasses() override;
  322. bool addInstSelector() override;
  323. bool addIRTranslator() override;
  324. bool addLegalizeMachineIR() override;
  325. bool addRegBankSelect() override;
  326. bool addGlobalInstructionSelect() override;
  327. bool addILPOpts() override;
  328. bool addPreISel() override;
  329. void addMachineSSAOptimization() override;
  330. void addPreRegAlloc() override;
  331. bool addPostFastRegAllocRewrite() override;
  332. void addPostRegAlloc() override;
  333. void addPreEmitPass() override;
  334. void addPreEmitPass2() override;
  335. void addPreSched2() override;
  336. bool addPreRewrite() override;
  337. std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
  338. };
  339. class X86ExecutionDomainFix : public ExecutionDomainFix {
  340. public:
  341. static char ID;
  342. X86ExecutionDomainFix() : ExecutionDomainFix(ID, X86::VR128XRegClass) {}
  343. StringRef getPassName() const override {
  344. return "X86 Execution Dependency Fix";
  345. }
  346. };
  347. char X86ExecutionDomainFix::ID;
  348. } // end anonymous namespace
  349. INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix",
  350. "X86 Execution Domain Fix", false, false)
  351. INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
  352. INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix",
  353. "X86 Execution Domain Fix", false, false)
  354. TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
  355. return new X86PassConfig(*this, PM);
  356. }
  357. void X86PassConfig::addIRPasses() {
  358. addPass(createAtomicExpandPass());
  359. // We add both pass anyway and when these two passes run, we skip the pass
  360. // based on the option level and option attribute.
  361. addPass(createX86LowerAMXIntrinsicsPass());
  362. addPass(createX86LowerAMXTypePass());
  363. if (TM->getOptLevel() == CodeGenOpt::None)
  364. addPass(createX86PreAMXConfigPass());
  365. TargetPassConfig::addIRPasses();
  366. if (TM->getOptLevel() != CodeGenOpt::None) {
  367. addPass(createInterleavedAccessPass());
  368. addPass(createX86PartialReductionPass());
  369. }
  370. // Add passes that handle indirect branch removal and insertion of a retpoline
  371. // thunk. These will be a no-op unless a function subtarget has the retpoline
  372. // feature enabled.
  373. addPass(createIndirectBrExpandPass());
  374. // Add Control Flow Guard checks.
  375. const Triple &TT = TM->getTargetTriple();
  376. if (TT.isOSWindows()) {
  377. if (TT.getArch() == Triple::x86_64) {
  378. addPass(createCFGuardDispatchPass());
  379. } else {
  380. addPass(createCFGuardCheckPass());
  381. }
  382. }
  383. }
  384. bool X86PassConfig::addInstSelector() {
  385. // Install an instruction selector.
  386. addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
  387. // For ELF, cleanup any local-dynamic TLS accesses.
  388. if (TM->getTargetTriple().isOSBinFormatELF() &&
  389. getOptLevel() != CodeGenOpt::None)
  390. addPass(createCleanupLocalDynamicTLSPass());
  391. addPass(createX86GlobalBaseRegPass());
  392. return false;
  393. }
  394. bool X86PassConfig::addIRTranslator() {
  395. addPass(new IRTranslator(getOptLevel()));
  396. return false;
  397. }
  398. bool X86PassConfig::addLegalizeMachineIR() {
  399. addPass(new Legalizer());
  400. return false;
  401. }
  402. bool X86PassConfig::addRegBankSelect() {
  403. addPass(new RegBankSelect());
  404. return false;
  405. }
  406. bool X86PassConfig::addGlobalInstructionSelect() {
  407. addPass(new InstructionSelect(getOptLevel()));
  408. return false;
  409. }
  410. bool X86PassConfig::addILPOpts() {
  411. addPass(&EarlyIfConverterID);
  412. if (EnableMachineCombinerPass)
  413. addPass(&MachineCombinerID);
  414. addPass(createX86CmovConverterPass());
  415. return true;
  416. }
  417. bool X86PassConfig::addPreISel() {
  418. // Only add this pass for 32-bit x86 Windows.
  419. const Triple &TT = TM->getTargetTriple();
  420. if (TT.isOSWindows() && TT.getArch() == Triple::x86)
  421. addPass(createX86WinEHStatePass());
  422. return true;
  423. }
  424. void X86PassConfig::addPreRegAlloc() {
  425. if (getOptLevel() != CodeGenOpt::None) {
  426. addPass(&LiveRangeShrinkID);
  427. addPass(createX86FixupSetCC());
  428. addPass(createX86OptimizeLEAs());
  429. addPass(createX86CallFrameOptimization());
  430. addPass(createX86AvoidStoreForwardingBlocks());
  431. }
  432. addPass(createX86SpeculativeLoadHardeningPass());
  433. addPass(createX86FlagsCopyLoweringPass());
  434. addPass(createX86DynAllocaExpander());
  435. if (getOptLevel() != CodeGenOpt::None) {
  436. addPass(createX86PreTileConfigPass());
  437. }
  438. }
  439. void X86PassConfig::addMachineSSAOptimization() {
  440. addPass(createX86DomainReassignmentPass());
  441. TargetPassConfig::addMachineSSAOptimization();
  442. }
  443. void X86PassConfig::addPostRegAlloc() {
  444. addPass(createX86LowerTileCopyPass());
  445. addPass(createX86FloatingPointStackifierPass());
  446. // When -O0 is enabled, the Load Value Injection Hardening pass will fall back
  447. // to using the Speculative Execution Side Effect Suppression pass for
  448. // mitigation. This is to prevent slow downs due to
  449. // analyses needed by the LVIHardening pass when compiling at -O0.
  450. if (getOptLevel() != CodeGenOpt::None)
  451. addPass(createX86LoadValueInjectionLoadHardeningPass());
  452. }
  453. void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
  454. void X86PassConfig::addPreEmitPass() {
  455. if (getOptLevel() != CodeGenOpt::None) {
  456. addPass(new X86ExecutionDomainFix());
  457. addPass(createBreakFalseDeps());
  458. }
  459. addPass(createX86IndirectBranchTrackingPass());
  460. addPass(createX86IssueVZeroUpperPass());
  461. if (getOptLevel() != CodeGenOpt::None) {
  462. addPass(createX86FixupBWInsts());
  463. addPass(createX86PadShortFunctions());
  464. addPass(createX86FixupLEAs());
  465. }
  466. addPass(createX86EvexToVexInsts());
  467. addPass(createX86DiscriminateMemOpsPass());
  468. addPass(createX86InsertPrefetchPass());
  469. addPass(createX86InsertX87waitPass());
  470. }
  471. void X86PassConfig::addPreEmitPass2() {
  472. const Triple &TT = TM->getTargetTriple();
  473. const MCAsmInfo *MAI = TM->getMCAsmInfo();
  474. // The X86 Speculative Execution Pass must run after all control
  475. // flow graph modifying passes. As a result it was listed to run right before
  476. // the X86 Retpoline Thunks pass. The reason it must run after control flow
  477. // graph modifications is that the model of LFENCE in LLVM has to be updated
  478. // (FIXME: https://bugs.llvm.org/show_bug.cgi?id=45167). Currently the
  479. // placement of this pass was hand checked to ensure that the subsequent
  480. // passes don't move the code around the LFENCEs in a way that will hurt the
  481. // correctness of this pass. This placement has been shown to work based on
  482. // hand inspection of the codegen output.
  483. addPass(createX86SpeculativeExecutionSideEffectSuppression());
  484. addPass(createX86IndirectThunksPass());
  485. // Insert extra int3 instructions after trailing call instructions to avoid
  486. // issues in the unwinder.
  487. if (TT.isOSWindows() && TT.getArch() == Triple::x86_64)
  488. addPass(createX86AvoidTrailingCallPass());
  489. // Verify basic block incoming and outgoing cfa offset and register values and
  490. // correct CFA calculation rule where needed by inserting appropriate CFI
  491. // instructions.
  492. if (!TT.isOSDarwin() &&
  493. (!TT.isOSWindows() ||
  494. MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
  495. addPass(createCFIInstrInserter());
  496. if (TT.isOSWindows()) {
  497. // Identify valid longjmp targets for Windows Control Flow Guard.
  498. addPass(createCFGuardLongjmpPass());
  499. // Identify valid eh continuation targets for Windows EHCont Guard.
  500. addPass(createEHContGuardCatchretPass());
  501. }
  502. addPass(createX86LoadValueInjectionRetHardeningPass());
  503. // Insert pseudo probe annotation for callsite profiling
  504. addPass(createPseudoProbeInserter());
  505. // On Darwin platforms, BLR_RVMARKER pseudo instructions are lowered to
  506. // bundles.
  507. if (TT.isOSDarwin())
  508. addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
  509. // Only run bundle expansion if there are relevant ObjC runtime functions
  510. // present in the module.
  511. const Function &F = MF.getFunction();
  512. const Module *M = F.getParent();
  513. return M->getFunction("objc_retainAutoreleasedReturnValue") ||
  514. M->getFunction("objc_unsafeClaimAutoreleasedReturnValue");
  515. }));
  516. }
  517. bool X86PassConfig::addPostFastRegAllocRewrite() {
  518. addPass(createX86FastTileConfigPass());
  519. return true;
  520. }
  521. bool X86PassConfig::addPreRewrite() {
  522. addPass(createX86TileConfigPass());
  523. return true;
  524. }
  525. std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
  526. return getStandardCSEConfigForOpt(TM->getOptLevel());
  527. }