ARMTargetMachine.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "ARMTargetMachine.h"
  12. #include "ARM.h"
  13. #include "ARMMacroFusion.h"
  14. #include "ARMSubtarget.h"
  15. #include "ARMTargetObjectFile.h"
  16. #include "ARMTargetTransformInfo.h"
  17. #include "MCTargetDesc/ARMMCTargetDesc.h"
  18. #include "TargetInfo/ARMTargetInfo.h"
  19. #include "llvm/ADT/Optional.h"
  20. #include "llvm/ADT/STLExtras.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/ADT/Triple.h"
  23. #include "llvm/Analysis/TargetTransformInfo.h"
  24. #include "llvm/CodeGen/ExecutionDomainFix.h"
  25. #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  26. #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  27. #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  28. #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  29. #include "llvm/CodeGen/GlobalISel/Legalizer.h"
  30. #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  31. #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
  32. #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
  33. #include "llvm/CodeGen/MachineFunction.h"
  34. #include "llvm/CodeGen/MachineScheduler.h"
  35. #include "llvm/CodeGen/Passes.h"
  36. #include "llvm/CodeGen/TargetPassConfig.h"
  37. #include "llvm/IR/Attributes.h"
  38. #include "llvm/IR/DataLayout.h"
  39. #include "llvm/IR/Function.h"
  40. #include "llvm/MC/TargetRegistry.h"
  41. #include "llvm/Pass.h"
  42. #include "llvm/Support/CodeGen.h"
  43. #include "llvm/Support/CommandLine.h"
  44. #include "llvm/Support/ErrorHandling.h"
  45. #include "llvm/Support/ARMTargetParser.h"
  46. #include "llvm/Support/TargetParser.h"
  47. #include "llvm/Target/TargetLoweringObjectFile.h"
  48. #include "llvm/Target/TargetOptions.h"
  49. #include "llvm/Transforms/CFGuard.h"
  50. #include "llvm/Transforms/IPO.h"
  51. #include "llvm/Transforms/Scalar.h"
  52. #include <cassert>
  53. #include <memory>
  54. #include <string>
  55. using namespace llvm;
  56. static cl::opt<bool>
  57. DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
  58. cl::desc("Inhibit optimization of S->D register accesses on A15"),
  59. cl::init(false));
  60. static cl::opt<bool>
  61. EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
  62. cl::desc("Run SimplifyCFG after expanding atomic operations"
  63. " to make use of cmpxchg flow-based information"),
  64. cl::init(true));
  65. static cl::opt<bool>
  66. EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
  67. cl::desc("Enable ARM load/store optimization pass"),
  68. cl::init(true));
  69. // FIXME: Unify control over GlobalMerge.
  70. static cl::opt<cl::boolOrDefault>
  71. EnableGlobalMerge("arm-global-merge", cl::Hidden,
  72. cl::desc("Enable the global merge pass"));
  73. namespace llvm {
  74. void initializeARMExecutionDomainFixPass(PassRegistry&);
  75. }
  76. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
  77. // Register the target.
  78. RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
  79. RegisterTargetMachine<ARMLETargetMachine> A(getTheThumbLETarget());
  80. RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
  81. RegisterTargetMachine<ARMBETargetMachine> B(getTheThumbBETarget());
  82. PassRegistry &Registry = *PassRegistry::getPassRegistry();
  83. initializeGlobalISel(Registry);
  84. initializeARMLoadStoreOptPass(Registry);
  85. initializeARMPreAllocLoadStoreOptPass(Registry);
  86. initializeARMParallelDSPPass(Registry);
  87. initializeARMBranchTargetsPass(Registry);
  88. initializeARMConstantIslandsPass(Registry);
  89. initializeARMExecutionDomainFixPass(Registry);
  90. initializeARMExpandPseudoPass(Registry);
  91. initializeThumb2SizeReducePass(Registry);
  92. initializeMVEVPTBlockPass(Registry);
  93. initializeMVETPAndVPTOptimisationsPass(Registry);
  94. initializeMVETailPredicationPass(Registry);
  95. initializeARMLowOverheadLoopsPass(Registry);
  96. initializeARMBlockPlacementPass(Registry);
  97. initializeMVEGatherScatterLoweringPass(Registry);
  98. initializeARMSLSHardeningPass(Registry);
  99. initializeMVELaneInterleavingPass(Registry);
  100. }
  101. static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  102. if (TT.isOSBinFormatMachO())
  103. return std::make_unique<TargetLoweringObjectFileMachO>();
  104. if (TT.isOSWindows())
  105. return std::make_unique<TargetLoweringObjectFileCOFF>();
  106. return std::make_unique<ARMElfTargetObjectFile>();
  107. }
  108. static ARMBaseTargetMachine::ARMABI
  109. computeTargetABI(const Triple &TT, StringRef CPU,
  110. const TargetOptions &Options) {
  111. StringRef ABIName = Options.MCOptions.getABIName();
  112. if (ABIName.empty())
  113. ABIName = ARM::computeDefaultTargetABI(TT, CPU);
  114. if (ABIName == "aapcs16")
  115. return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
  116. else if (ABIName.startswith("aapcs"))
  117. return ARMBaseTargetMachine::ARM_ABI_AAPCS;
  118. else if (ABIName.startswith("apcs"))
  119. return ARMBaseTargetMachine::ARM_ABI_APCS;
  120. llvm_unreachable("Unhandled/unknown ABI Name!");
  121. return ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
  122. }
  123. static std::string computeDataLayout(const Triple &TT, StringRef CPU,
  124. const TargetOptions &Options,
  125. bool isLittle) {
  126. auto ABI = computeTargetABI(TT, CPU, Options);
  127. std::string Ret;
  128. if (isLittle)
  129. // Little endian.
  130. Ret += "e";
  131. else
  132. // Big endian.
  133. Ret += "E";
  134. Ret += DataLayout::getManglingComponent(TT);
  135. // Pointers are 32 bits and aligned to 32 bits.
  136. Ret += "-p:32:32";
  137. // Function pointers are aligned to 8 bits (because the LSB stores the
  138. // ARM/Thumb state).
  139. Ret += "-Fi8";
  140. // ABIs other than APCS have 64 bit integers with natural alignment.
  141. if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS)
  142. Ret += "-i64:64";
  143. // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
  144. // bits, others to 64 bits. We always try to align to 64 bits.
  145. if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
  146. Ret += "-f64:32:64";
  147. // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
  148. // to 64. We always ty to give them natural alignment.
  149. if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
  150. Ret += "-v64:32:64-v128:32:128";
  151. else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
  152. Ret += "-v128:64:128";
  153. // Try to align aggregates to 32 bits (the default is 64 bits, which has no
  154. // particular hardware support on 32-bit ARM).
  155. Ret += "-a:0:32";
  156. // Integer registers are 32 bits.
  157. Ret += "-n32";
  158. // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
  159. // aligned everywhere else.
  160. if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
  161. Ret += "-S128";
  162. else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
  163. Ret += "-S64";
  164. else
  165. Ret += "-S32";
  166. return Ret;
  167. }
  168. static Reloc::Model getEffectiveRelocModel(const Triple &TT,
  169. Optional<Reloc::Model> RM) {
  170. if (!RM.hasValue())
  171. // Default relocation model on Darwin is PIC.
  172. return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
  173. if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI)
  174. assert(TT.isOSBinFormatELF() &&
  175. "ROPI/RWPI currently only supported for ELF");
  176. // DynamicNoPIC is only used on darwin.
  177. if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin())
  178. return Reloc::Static;
  179. return *RM;
  180. }
  181. /// Create an ARM architecture model.
  182. ///
  183. ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
  184. StringRef CPU, StringRef FS,
  185. const TargetOptions &Options,
  186. Optional<Reloc::Model> RM,
  187. Optional<CodeModel::Model> CM,
  188. CodeGenOpt::Level OL, bool isLittle)
  189. : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
  190. CPU, FS, Options, getEffectiveRelocModel(TT, RM),
  191. getEffectiveCodeModel(CM, CodeModel::Small), OL),
  192. TargetABI(computeTargetABI(TT, CPU, Options)),
  193. TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) {
  194. // Default to triple-appropriate float ABI
  195. if (Options.FloatABIType == FloatABI::Default) {
  196. if (isTargetHardFloat())
  197. this->Options.FloatABIType = FloatABI::Hard;
  198. else
  199. this->Options.FloatABIType = FloatABI::Soft;
  200. }
  201. // Default to triple-appropriate EABI
  202. if (Options.EABIVersion == EABI::Default ||
  203. Options.EABIVersion == EABI::Unknown) {
  204. // musl is compatible with glibc with regard to EABI version
  205. if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
  206. TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
  207. TargetTriple.getEnvironment() == Triple::MuslEABI ||
  208. TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
  209. !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin()))
  210. this->Options.EABIVersion = EABI::GNU;
  211. else
  212. this->Options.EABIVersion = EABI::EABI5;
  213. }
  214. if (TT.isOSBinFormatMachO()) {
  215. this->Options.TrapUnreachable = true;
  216. this->Options.NoTrapAfterNoreturn = true;
  217. }
  218. // ARM supports the debug entry values.
  219. setSupportsDebugEntryValues(true);
  220. initAsmInfo();
  221. // ARM supports the MachineOutliner.
  222. setMachineOutliner(true);
  223. setSupportsDefaultOutlining(true);
  224. }
  225. ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
  226. const ARMSubtarget *
  227. ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
  228. Attribute CPUAttr = F.getFnAttribute("target-cpu");
  229. Attribute FSAttr = F.getFnAttribute("target-features");
  230. std::string CPU =
  231. CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
  232. std::string FS =
  233. FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
  234. // FIXME: This is related to the code below to reset the target options,
  235. // we need to know whether or not the soft float flag is set on the
  236. // function before we can generate a subtarget. We also need to use
  237. // it as a key for the subtarget since that can be the only difference
  238. // between two functions.
  239. bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool();
  240. // If the soft float attribute is set on the function turn on the soft float
  241. // subtarget feature.
  242. if (SoftFloat)
  243. FS += FS.empty() ? "+soft-float" : ",+soft-float";
  244. // Use the optminsize to identify the subtarget, but don't use it in the
  245. // feature string.
  246. std::string Key = CPU + FS;
  247. if (F.hasMinSize())
  248. Key += "+minsize";
  249. auto &I = SubtargetMap[Key];
  250. if (!I) {
  251. // This needs to be done before we create a new subtarget since any
  252. // creation will depend on the TM and the code generation flags on the
  253. // function that reside in TargetOptions.
  254. resetTargetOptions(F);
  255. I = std::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
  256. F.hasMinSize());
  257. if (!I->isThumb() && !I->hasARMOps())
  258. F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
  259. "instructions, but the target does not support ARM mode execution.");
  260. }
  261. return I.get();
  262. }
  263. TargetTransformInfo
  264. ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) {
  265. return TargetTransformInfo(ARMTTIImpl(this, F));
  266. }
  267. ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
  268. StringRef CPU, StringRef FS,
  269. const TargetOptions &Options,
  270. Optional<Reloc::Model> RM,
  271. Optional<CodeModel::Model> CM,
  272. CodeGenOpt::Level OL, bool JIT)
  273. : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
  274. ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
  275. StringRef CPU, StringRef FS,
  276. const TargetOptions &Options,
  277. Optional<Reloc::Model> RM,
  278. Optional<CodeModel::Model> CM,
  279. CodeGenOpt::Level OL, bool JIT)
  280. : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
  281. namespace {
  282. /// ARM Code Generator Pass Configuration Options.
  283. class ARMPassConfig : public TargetPassConfig {
  284. public:
  285. ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
  286. : TargetPassConfig(TM, PM) {}
  287. ARMBaseTargetMachine &getARMTargetMachine() const {
  288. return getTM<ARMBaseTargetMachine>();
  289. }
  290. ScheduleDAGInstrs *
  291. createMachineScheduler(MachineSchedContext *C) const override {
  292. ScheduleDAGMILive *DAG = createGenericSchedLive(C);
  293. // add DAG Mutations here.
  294. const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
  295. if (ST.hasFusion())
  296. DAG->addMutation(createARMMacroFusionDAGMutation());
  297. return DAG;
  298. }
  299. ScheduleDAGInstrs *
  300. createPostMachineScheduler(MachineSchedContext *C) const override {
  301. ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
  302. // add DAG Mutations here.
  303. const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
  304. if (ST.hasFusion())
  305. DAG->addMutation(createARMMacroFusionDAGMutation());
  306. return DAG;
  307. }
  308. void addIRPasses() override;
  309. void addCodeGenPrepare() override;
  310. bool addPreISel() override;
  311. bool addInstSelector() override;
  312. bool addIRTranslator() override;
  313. bool addLegalizeMachineIR() override;
  314. bool addRegBankSelect() override;
  315. bool addGlobalInstructionSelect() override;
  316. void addPreRegAlloc() override;
  317. void addPreSched2() override;
  318. void addPreEmitPass() override;
  319. void addPreEmitPass2() override;
  320. std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
  321. };
  322. class ARMExecutionDomainFix : public ExecutionDomainFix {
  323. public:
  324. static char ID;
  325. ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {}
  326. StringRef getPassName() const override {
  327. return "ARM Execution Domain Fix";
  328. }
  329. };
  330. char ARMExecutionDomainFix::ID;
  331. } // end anonymous namespace
  332. INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix",
  333. "ARM Execution Domain Fix", false, false)
  334. INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis)
  335. INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix",
  336. "ARM Execution Domain Fix", false, false)
  337. TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
  338. return new ARMPassConfig(*this, PM);
  339. }
  340. std::unique_ptr<CSEConfigBase> ARMPassConfig::getCSEConfig() const {
  341. return getStandardCSEConfigForOpt(TM->getOptLevel());
  342. }
  343. void ARMPassConfig::addIRPasses() {
  344. if (TM->Options.ThreadModel == ThreadModel::Single)
  345. addPass(createLowerAtomicPass());
  346. else
  347. addPass(createAtomicExpandPass());
  348. // Cmpxchg instructions are often used with a subsequent comparison to
  349. // determine whether it succeeded. We can exploit existing control-flow in
  350. // ldrex/strex loops to simplify this, but it needs tidying up.
  351. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
  352. addPass(createCFGSimplificationPass(
  353. SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true),
  354. [this](const Function &F) {
  355. const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
  356. return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
  357. }));
  358. addPass(createMVEGatherScatterLoweringPass());
  359. addPass(createMVELaneInterleavingPass());
  360. TargetPassConfig::addIRPasses();
  361. // Run the parallel DSP pass.
  362. if (getOptLevel() == CodeGenOpt::Aggressive)
  363. addPass(createARMParallelDSPPass());
  364. // Match interleaved memory accesses to ldN/stN intrinsics.
  365. if (TM->getOptLevel() != CodeGenOpt::None)
  366. addPass(createInterleavedAccessPass());
  367. // Add Control Flow Guard checks.
  368. if (TM->getTargetTriple().isOSWindows())
  369. addPass(createCFGuardCheckPass());
  370. }
  371. void ARMPassConfig::addCodeGenPrepare() {
  372. if (getOptLevel() != CodeGenOpt::None)
  373. addPass(createTypePromotionPass());
  374. TargetPassConfig::addCodeGenPrepare();
  375. }
  376. bool ARMPassConfig::addPreISel() {
  377. if ((TM->getOptLevel() != CodeGenOpt::None &&
  378. EnableGlobalMerge == cl::BOU_UNSET) ||
  379. EnableGlobalMerge == cl::BOU_TRUE) {
  380. // FIXME: This is using the thumb1 only constant value for
  381. // maximal global offset for merging globals. We may want
  382. // to look into using the old value for non-thumb1 code of
  383. // 4095 based on the TargetMachine, but this starts to become
  384. // tricky when doing code gen per function.
  385. bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
  386. (EnableGlobalMerge == cl::BOU_UNSET);
  387. // Merging of extern globals is enabled by default on non-Mach-O as we
  388. // expect it to be generally either beneficial or harmless. On Mach-O it
  389. // is disabled as we emit the .subsections_via_symbols directive which
  390. // means that merging extern globals is not safe.
  391. bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
  392. addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
  393. MergeExternalByDefault));
  394. }
  395. if (TM->getOptLevel() != CodeGenOpt::None) {
  396. addPass(createHardwareLoopsPass());
  397. addPass(createMVETailPredicationPass());
  398. // FIXME: IR passes can delete address-taken basic blocks, deleting
  399. // corresponding blockaddresses. ARMConstantPoolConstant holds references to
  400. // address-taken basic blocks which can be invalidated if the function
  401. // containing the blockaddress has already been codegen'd and the basic
  402. // block is removed. Work around this by forcing all IR passes to run before
  403. // any ISel takes place. We should have a more principled way of handling
  404. // this. See D99707 for more details.
  405. addPass(createBarrierNoopPass());
  406. }
  407. return false;
  408. }
  409. bool ARMPassConfig::addInstSelector() {
  410. addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
  411. return false;
  412. }
  413. bool ARMPassConfig::addIRTranslator() {
  414. addPass(new IRTranslator(getOptLevel()));
  415. return false;
  416. }
  417. bool ARMPassConfig::addLegalizeMachineIR() {
  418. addPass(new Legalizer());
  419. return false;
  420. }
  421. bool ARMPassConfig::addRegBankSelect() {
  422. addPass(new RegBankSelect());
  423. return false;
  424. }
  425. bool ARMPassConfig::addGlobalInstructionSelect() {
  426. addPass(new InstructionSelect(getOptLevel()));
  427. return false;
  428. }
  429. void ARMPassConfig::addPreRegAlloc() {
  430. if (getOptLevel() != CodeGenOpt::None) {
  431. addPass(createMVETPAndVPTOptimisationsPass());
  432. addPass(createMLxExpansionPass());
  433. if (EnableARMLoadStoreOpt)
  434. addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
  435. if (!DisableA15SDOptimization)
  436. addPass(createA15SDOptimizerPass());
  437. }
  438. }
  439. void ARMPassConfig::addPreSched2() {
  440. if (getOptLevel() != CodeGenOpt::None) {
  441. if (EnableARMLoadStoreOpt)
  442. addPass(createARMLoadStoreOptimizationPass());
  443. addPass(new ARMExecutionDomainFix());
  444. addPass(createBreakFalseDeps());
  445. }
  446. // Expand some pseudo instructions into multiple instructions to allow
  447. // proper scheduling.
  448. addPass(createARMExpandPseudoPass());
  449. if (getOptLevel() != CodeGenOpt::None) {
  450. // When optimising for size, always run the Thumb2SizeReduction pass before
  451. // IfConversion. Otherwise, check whether IT blocks are restricted
  452. // (e.g. in v8, IfConversion depends on Thumb instruction widths)
  453. addPass(createThumb2SizeReductionPass([this](const Function &F) {
  454. return this->TM->getSubtarget<ARMSubtarget>(F).hasMinSize() ||
  455. this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
  456. }));
  457. addPass(createIfConverter([](const MachineFunction &MF) {
  458. return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
  459. }));
  460. }
  461. addPass(createThumb2ITBlockPass());
  462. // Add both scheduling passes to give the subtarget an opportunity to pick
  463. // between them.
  464. if (getOptLevel() != CodeGenOpt::None) {
  465. addPass(&PostMachineSchedulerID);
  466. addPass(&PostRASchedulerID);
  467. }
  468. addPass(createMVEVPTBlockPass());
  469. addPass(createARMIndirectThunks());
  470. addPass(createARMSLSHardeningPass());
  471. }
  472. void ARMPassConfig::addPreEmitPass() {
  473. addPass(createThumb2SizeReductionPass());
  474. // Constant island pass work on unbundled instructions.
  475. addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
  476. return MF.getSubtarget<ARMSubtarget>().isThumb2();
  477. }));
  478. // Don't optimize barriers or block placement at -O0.
  479. if (getOptLevel() != CodeGenOpt::None) {
  480. addPass(createARMBlockPlacementPass());
  481. addPass(createARMOptimizeBarriersPass());
  482. }
  483. }
  484. void ARMPassConfig::addPreEmitPass2() {
  485. addPass(createARMBranchTargetsPass());
  486. addPass(createARMConstantIslandPass());
  487. addPass(createARMLowOverheadLoopsPass());
  488. if (TM->getTargetTriple().isOSWindows()) {
  489. // Identify valid longjmp targets for Windows Control Flow Guard.
  490. addPass(createCFGuardLongjmpPass());
  491. // Identify valid eh continuation targets for Windows EHCont Guard.
  492. addPass(createEHContGuardCatchretPass());
  493. }
  494. }