AArch64TargetMachine.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "AArch64TargetMachine.h"
  12. #include "AArch64.h"
  13. #include "AArch64MachineFunctionInfo.h"
  14. #include "AArch64MacroFusion.h"
  15. #include "AArch64Subtarget.h"
  16. #include "AArch64TargetObjectFile.h"
  17. #include "AArch64TargetTransformInfo.h"
  18. #include "MCTargetDesc/AArch64MCTargetDesc.h"
  19. #include "TargetInfo/AArch64TargetInfo.h"
  20. #include "llvm/ADT/STLExtras.h"
  21. #include "llvm/ADT/Triple.h"
  22. #include "llvm/Analysis/TargetTransformInfo.h"
  23. #include "llvm/CodeGen/CSEConfigBase.h"
  24. #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  25. #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  26. #include "llvm/CodeGen/GlobalISel/Legalizer.h"
  27. #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
  28. #include "llvm/CodeGen/GlobalISel/Localizer.h"
  29. #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
  30. #include "llvm/CodeGen/MIRParser/MIParser.h"
  31. #include "llvm/CodeGen/MachineScheduler.h"
  32. #include "llvm/CodeGen/Passes.h"
  33. #include "llvm/CodeGen/TargetPassConfig.h"
  34. #include "llvm/IR/Attributes.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/InitializePasses.h"
  37. #include "llvm/MC/MCAsmInfo.h"
  38. #include "llvm/MC/MCTargetOptions.h"
  39. #include "llvm/MC/TargetRegistry.h"
  40. #include "llvm/Pass.h"
  41. #include "llvm/Support/CodeGen.h"
  42. #include "llvm/Support/CommandLine.h"
  43. #include "llvm/Target/TargetLoweringObjectFile.h"
  44. #include "llvm/Target/TargetOptions.h"
  45. #include "llvm/Transforms/CFGuard.h"
  46. #include "llvm/Transforms/Scalar.h"
  47. #include <memory>
  48. #include <string>
  49. using namespace llvm;
  50. static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
  51. cl::desc("Enable the CCMP formation pass"),
  52. cl::init(true), cl::Hidden);
  53. static cl::opt<bool>
  54. EnableCondBrTuning("aarch64-enable-cond-br-tune",
  55. cl::desc("Enable the conditional branch tuning pass"),
  56. cl::init(true), cl::Hidden);
  57. static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
  58. cl::desc("Enable the machine combiner pass"),
  59. cl::init(true), cl::Hidden);
  60. static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
  61. cl::desc("Suppress STP for AArch64"),
  62. cl::init(true), cl::Hidden);
  63. static cl::opt<bool> EnableAdvSIMDScalar(
  64. "aarch64-enable-simd-scalar",
  65. cl::desc("Enable use of AdvSIMD scalar integer instructions"),
  66. cl::init(false), cl::Hidden);
  67. static cl::opt<bool>
  68. EnablePromoteConstant("aarch64-enable-promote-const",
  69. cl::desc("Enable the promote constant pass"),
  70. cl::init(true), cl::Hidden);
  71. static cl::opt<bool> EnableCollectLOH(
  72. "aarch64-enable-collect-loh",
  73. cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
  74. cl::init(true), cl::Hidden);
  75. static cl::opt<bool>
  76. EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
  77. cl::desc("Enable the pass that removes dead"
  78. " definitons and replaces stores to"
  79. " them with stores to the zero"
  80. " register"),
  81. cl::init(true));
  82. static cl::opt<bool> EnableRedundantCopyElimination(
  83. "aarch64-enable-copyelim",
  84. cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
  85. cl::Hidden);
  86. static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
  87. cl::desc("Enable the load/store pair"
  88. " optimization pass"),
  89. cl::init(true), cl::Hidden);
  90. static cl::opt<bool> EnableAtomicTidy(
  91. "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
  92. cl::desc("Run SimplifyCFG after expanding atomic operations"
  93. " to make use of cmpxchg flow-based information"),
  94. cl::init(true));
  95. static cl::opt<bool>
  96. EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
  97. cl::desc("Run early if-conversion"),
  98. cl::init(true));
  99. static cl::opt<bool>
  100. EnableCondOpt("aarch64-enable-condopt",
  101. cl::desc("Enable the condition optimizer pass"),
  102. cl::init(true), cl::Hidden);
  103. static cl::opt<bool>
  104. EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
  105. cl::desc("Enable optimizations on complex GEPs"),
  106. cl::init(false));
  107. static cl::opt<bool>
  108. BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
  109. cl::desc("Relax out of range conditional branches"));
  110. static cl::opt<bool> EnableCompressJumpTables(
  111. "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
  112. cl::desc("Use smallest entry possible for jump tables"));
  113. // FIXME: Unify control over GlobalMerge.
  114. static cl::opt<cl::boolOrDefault>
  115. EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
  116. cl::desc("Enable the global merge pass"));
  117. static cl::opt<bool>
  118. EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
  119. cl::desc("Enable the loop data prefetch pass"),
  120. cl::init(true));
  121. static cl::opt<int> EnableGlobalISelAtO(
  122. "aarch64-enable-global-isel-at-O", cl::Hidden,
  123. cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
  124. cl::init(0));
  125. static cl::opt<bool>
  126. EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
  127. cl::desc("Enable SVE intrinsic opts"),
  128. cl::init(true));
  129. static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
  130. cl::init(true), cl::Hidden);
  131. static cl::opt<bool>
  132. EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
  133. cl::desc("Enable the AArch64 branch target pass"),
  134. cl::init(true));
  135. static cl::opt<unsigned> SVEVectorBitsMaxOpt(
  136. "aarch64-sve-vector-bits-max",
  137. cl::desc("Assume SVE vector registers are at most this big, "
  138. "with zero meaning no maximum size is assumed."),
  139. cl::init(0), cl::Hidden);
  140. static cl::opt<unsigned> SVEVectorBitsMinOpt(
  141. "aarch64-sve-vector-bits-min",
  142. cl::desc("Assume SVE vector registers are at least this big, "
  143. "with zero meaning no minimum size is assumed."),
  144. cl::init(0), cl::Hidden);
  145. extern cl::opt<bool> EnableHomogeneousPrologEpilog;
  146. static cl::opt<bool> EnableGISelLoadStoreOptPreLegal(
  147. "aarch64-enable-gisel-ldst-prelegal",
  148. cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"),
  149. cl::init(true), cl::Hidden);
  150. static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
  151. "aarch64-enable-gisel-ldst-postlegal",
  152. cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
  153. cl::init(false), cl::Hidden);
  154. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
  155. // Register the target.
  156. RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
  157. RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
  158. RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
  159. RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target());
  160. RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
  161. auto PR = PassRegistry::getPassRegistry();
  162. initializeGlobalISel(*PR);
  163. initializeAArch64A53Fix835769Pass(*PR);
  164. initializeAArch64A57FPLoadBalancingPass(*PR);
  165. initializeAArch64AdvSIMDScalarPass(*PR);
  166. initializeAArch64BranchTargetsPass(*PR);
  167. initializeAArch64CollectLOHPass(*PR);
  168. initializeAArch64CompressJumpTablesPass(*PR);
  169. initializeAArch64ConditionalComparesPass(*PR);
  170. initializeAArch64ConditionOptimizerPass(*PR);
  171. initializeAArch64DeadRegisterDefinitionsPass(*PR);
  172. initializeAArch64ExpandPseudoPass(*PR);
  173. initializeAArch64LoadStoreOptPass(*PR);
  174. initializeAArch64MIPeepholeOptPass(*PR);
  175. initializeAArch64SIMDInstrOptPass(*PR);
  176. initializeAArch64O0PreLegalizerCombinerPass(*PR);
  177. initializeAArch64PreLegalizerCombinerPass(*PR);
  178. initializeAArch64PostLegalizerCombinerPass(*PR);
  179. initializeAArch64PostLegalizerLoweringPass(*PR);
  180. initializeAArch64PostSelectOptimizePass(*PR);
  181. initializeAArch64PromoteConstantPass(*PR);
  182. initializeAArch64RedundantCopyEliminationPass(*PR);
  183. initializeAArch64StorePairSuppressPass(*PR);
  184. initializeFalkorHWPFFixPass(*PR);
  185. initializeFalkorMarkStridedAccessesLegacyPass(*PR);
  186. initializeLDTLSCleanupPass(*PR);
  187. initializeSVEIntrinsicOptsPass(*PR);
  188. initializeAArch64SpeculationHardeningPass(*PR);
  189. initializeAArch64SLSHardeningPass(*PR);
  190. initializeAArch64StackTaggingPass(*PR);
  191. initializeAArch64StackTaggingPreRAPass(*PR);
  192. initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
  193. }
  194. //===----------------------------------------------------------------------===//
  195. // AArch64 Lowering public interface.
  196. //===----------------------------------------------------------------------===//
  197. static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  198. if (TT.isOSBinFormatMachO())
  199. return std::make_unique<AArch64_MachoTargetObjectFile>();
  200. if (TT.isOSBinFormatCOFF())
  201. return std::make_unique<AArch64_COFFTargetObjectFile>();
  202. return std::make_unique<AArch64_ELFTargetObjectFile>();
  203. }
  204. // Helper function to build a DataLayout string
  205. static std::string computeDataLayout(const Triple &TT,
  206. const MCTargetOptions &Options,
  207. bool LittleEndian) {
  208. if (TT.isOSBinFormatMachO()) {
  209. if (TT.getArch() == Triple::aarch64_32)
  210. return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128";
  211. return "e-m:o-i64:64-i128:128-n32:64-S128";
  212. }
  213. if (TT.isOSBinFormatCOFF())
  214. return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
  215. std::string Endian = LittleEndian ? "e" : "E";
  216. std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
  217. return Endian + "-m:e" + Ptr32 +
  218. "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
  219. }
  220. static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
  221. if (CPU.empty() && TT.isArm64e())
  222. return "apple-a12";
  223. return CPU;
  224. }
  225. static Reloc::Model getEffectiveRelocModel(const Triple &TT,
  226. Optional<Reloc::Model> RM) {
  227. // AArch64 Darwin and Windows are always PIC.
  228. if (TT.isOSDarwin() || TT.isOSWindows())
  229. return Reloc::PIC_;
  230. // On ELF platforms the default static relocation model has a smart enough
  231. // linker to cope with referencing external symbols defined in a shared
  232. // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
  233. if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
  234. return Reloc::Static;
  235. return *RM;
  236. }
  237. static CodeModel::Model
  238. getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
  239. bool JIT) {
  240. if (CM) {
  241. if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
  242. *CM != CodeModel::Large) {
  243. report_fatal_error(
  244. "Only small, tiny and large code models are allowed on AArch64");
  245. } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
  246. report_fatal_error("tiny code model is only supported on ELF");
  247. return *CM;
  248. }
  249. // The default MCJIT memory managers make no guarantees about where they can
  250. // find an executable page; JITed code needs to be able to refer to globals
  251. // no matter how far away they are.
  252. // We should set the CodeModel::Small for Windows ARM64 in JIT mode,
  253. // since with large code model LLVM generating 4 MOV instructions, and
  254. // Windows doesn't support relocating these long branch (4 MOVs).
  255. if (JIT && !TT.isOSWindows())
  256. return CodeModel::Large;
  257. return CodeModel::Small;
  258. }
  259. /// Create an AArch64 architecture model.
  260. ///
  261. AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
  262. StringRef CPU, StringRef FS,
  263. const TargetOptions &Options,
  264. Optional<Reloc::Model> RM,
  265. Optional<CodeModel::Model> CM,
  266. CodeGenOpt::Level OL, bool JIT,
  267. bool LittleEndian)
  268. : LLVMTargetMachine(T,
  269. computeDataLayout(TT, Options.MCOptions, LittleEndian),
  270. TT, computeDefaultCPU(TT, CPU), FS, Options,
  271. getEffectiveRelocModel(TT, RM),
  272. getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
  273. TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
  274. initAsmInfo();
  275. if (TT.isOSBinFormatMachO()) {
  276. this->Options.TrapUnreachable = true;
  277. this->Options.NoTrapAfterNoreturn = true;
  278. }
  279. if (getMCAsmInfo()->usesWindowsCFI()) {
  280. // Unwinding can get confused if the last instruction in an
  281. // exception-handling region (function, funclet, try block, etc.)
  282. // is a call.
  283. //
  284. // FIXME: We could elide the trap if the next instruction would be in
  285. // the same region anyway.
  286. this->Options.TrapUnreachable = true;
  287. }
  288. if (this->Options.TLSSize == 0) // default
  289. this->Options.TLSSize = 24;
  290. if ((getCodeModel() == CodeModel::Small ||
  291. getCodeModel() == CodeModel::Kernel) &&
  292. this->Options.TLSSize > 32)
  293. // for the small (and kernel) code model, the maximum TLS size is 4GiB
  294. this->Options.TLSSize = 32;
  295. else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24)
  296. // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB)
  297. this->Options.TLSSize = 24;
  298. // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
  299. // MachO/CodeModel::Large, which GlobalISel does not support.
  300. if (getOptLevel() <= EnableGlobalISelAtO &&
  301. TT.getArch() != Triple::aarch64_32 &&
  302. TT.getEnvironment() != Triple::GNUILP32 &&
  303. !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
  304. setGlobalISel(true);
  305. setGlobalISelAbort(GlobalISelAbortMode::Disable);
  306. }
  307. // AArch64 supports the MachineOutliner.
  308. setMachineOutliner(true);
  309. // AArch64 supports default outlining behaviour.
  310. setSupportsDefaultOutlining(true);
  311. // AArch64 supports the debug entry values.
  312. setSupportsDebugEntryValues(true);
  313. }
  314. AArch64TargetMachine::~AArch64TargetMachine() = default;
  315. const AArch64Subtarget *
  316. AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
  317. Attribute CPUAttr = F.getFnAttribute("target-cpu");
  318. Attribute TuneAttr = F.getFnAttribute("tune-cpu");
  319. Attribute FSAttr = F.getFnAttribute("target-features");
  320. std::string CPU =
  321. CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
  322. std::string TuneCPU =
  323. TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
  324. std::string FS =
  325. FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
  326. SmallString<512> Key;
  327. unsigned MinSVEVectorSize = 0;
  328. unsigned MaxSVEVectorSize = 0;
  329. Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
  330. if (VScaleRangeAttr.isValid()) {
  331. Optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax();
  332. MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128;
  333. MaxSVEVectorSize = VScaleMax ? VScaleMax.getValue() * 128 : 0;
  334. } else {
  335. MinSVEVectorSize = SVEVectorBitsMinOpt;
  336. MaxSVEVectorSize = SVEVectorBitsMaxOpt;
  337. }
  338. assert(MinSVEVectorSize % 128 == 0 &&
  339. "SVE requires vector length in multiples of 128!");
  340. assert(MaxSVEVectorSize % 128 == 0 &&
  341. "SVE requires vector length in multiples of 128!");
  342. assert((MaxSVEVectorSize >= MinSVEVectorSize || MaxSVEVectorSize == 0) &&
  343. "Minimum SVE vector size should not be larger than its maximum!");
  344. // Sanitize user input in case of no asserts
  345. if (MaxSVEVectorSize == 0)
  346. MinSVEVectorSize = (MinSVEVectorSize / 128) * 128;
  347. else {
  348. MinSVEVectorSize =
  349. (std::min(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
  350. MaxSVEVectorSize =
  351. (std::max(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
  352. }
  353. Key += "SVEMin";
  354. Key += std::to_string(MinSVEVectorSize);
  355. Key += "SVEMax";
  356. Key += std::to_string(MaxSVEVectorSize);
  357. Key += CPU;
  358. Key += TuneCPU;
  359. Key += FS;
  360. auto &I = SubtargetMap[Key];
  361. if (!I) {
  362. // This needs to be done before we create a new subtarget since any
  363. // creation will depend on the TM and the code generation flags on the
  364. // function that reside in TargetOptions.
  365. resetTargetOptions(F);
  366. I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, TuneCPU, FS,
  367. *this, isLittle, MinSVEVectorSize,
  368. MaxSVEVectorSize);
  369. }
  370. return I.get();
  371. }
  372. void AArch64leTargetMachine::anchor() { }
  373. AArch64leTargetMachine::AArch64leTargetMachine(
  374. const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
  375. const TargetOptions &Options, Optional<Reloc::Model> RM,
  376. Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
  377. : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
  378. void AArch64beTargetMachine::anchor() { }
  379. AArch64beTargetMachine::AArch64beTargetMachine(
  380. const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
  381. const TargetOptions &Options, Optional<Reloc::Model> RM,
  382. Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
  383. : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
  384. namespace {
  385. /// AArch64 Code Generator Pass Configuration Options.
  386. class AArch64PassConfig : public TargetPassConfig {
  387. public:
  388. AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
  389. : TargetPassConfig(TM, PM) {
  390. if (TM.getOptLevel() != CodeGenOpt::None)
  391. substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
  392. }
  393. AArch64TargetMachine &getAArch64TargetMachine() const {
  394. return getTM<AArch64TargetMachine>();
  395. }
  396. ScheduleDAGInstrs *
  397. createMachineScheduler(MachineSchedContext *C) const override {
  398. const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
  399. ScheduleDAGMILive *DAG = createGenericSchedLive(C);
  400. DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
  401. DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
  402. if (ST.hasFusion())
  403. DAG->addMutation(createAArch64MacroFusionDAGMutation());
  404. return DAG;
  405. }
  406. ScheduleDAGInstrs *
  407. createPostMachineScheduler(MachineSchedContext *C) const override {
  408. const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
  409. if (ST.hasFusion()) {
  410. // Run the Macro Fusion after RA again since literals are expanded from
  411. // pseudos then (v. addPreSched2()).
  412. ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
  413. DAG->addMutation(createAArch64MacroFusionDAGMutation());
  414. return DAG;
  415. }
  416. return nullptr;
  417. }
  418. void addIRPasses() override;
  419. bool addPreISel() override;
  420. void addCodeGenPrepare() override;
  421. bool addInstSelector() override;
  422. bool addIRTranslator() override;
  423. void addPreLegalizeMachineIR() override;
  424. bool addLegalizeMachineIR() override;
  425. void addPreRegBankSelect() override;
  426. bool addRegBankSelect() override;
  427. void addPreGlobalInstructionSelect() override;
  428. bool addGlobalInstructionSelect() override;
  429. void addMachineSSAOptimization() override;
  430. bool addILPOpts() override;
  431. void addPreRegAlloc() override;
  432. void addPostRegAlloc() override;
  433. void addPreSched2() override;
  434. void addPreEmitPass() override;
  435. void addPreEmitPass2() override;
  436. std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
  437. };
  438. } // end anonymous namespace
  439. TargetTransformInfo
  440. AArch64TargetMachine::getTargetTransformInfo(const Function &F) {
  441. return TargetTransformInfo(AArch64TTIImpl(this, F));
  442. }
  443. TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
  444. return new AArch64PassConfig(*this, PM);
  445. }
  446. std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
  447. return getStandardCSEConfigForOpt(TM->getOptLevel());
  448. }
  449. void AArch64PassConfig::addIRPasses() {
  450. // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
  451. // ourselves.
  452. addPass(createAtomicExpandPass());
  453. // Expand any SVE vector library calls that we can't code generate directly.
  454. if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
  455. addPass(createSVEIntrinsicOptsPass());
  456. // Cmpxchg instructions are often used with a subsequent comparison to
  457. // determine whether it succeeded. We can exploit existing control-flow in
  458. // ldrex/strex loops to simplify this, but it needs tidying up.
  459. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
  460. addPass(createCFGSimplificationPass(SimplifyCFGOptions()
  461. .forwardSwitchCondToPhi(true)
  462. .convertSwitchRangeToICmp(true)
  463. .convertSwitchToLookupTable(true)
  464. .needCanonicalLoops(false)
  465. .hoistCommonInsts(true)
  466. .sinkCommonInsts(true)));
  467. // Run LoopDataPrefetch
  468. //
  469. // Run this before LSR to remove the multiplies involved in computing the
  470. // pointer values N iterations ahead.
  471. if (TM->getOptLevel() != CodeGenOpt::None) {
  472. if (EnableLoopDataPrefetch)
  473. addPass(createLoopDataPrefetchPass());
  474. if (EnableFalkorHWPFFix)
  475. addPass(createFalkorMarkStridedAccessesPass());
  476. }
  477. TargetPassConfig::addIRPasses();
  478. addPass(createAArch64StackTaggingPass(
  479. /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
  480. // Match interleaved memory accesses to ldN/stN intrinsics.
  481. if (TM->getOptLevel() != CodeGenOpt::None) {
  482. addPass(createInterleavedLoadCombinePass());
  483. addPass(createInterleavedAccessPass());
  484. }
  485. if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
  486. // Call SeparateConstOffsetFromGEP pass to extract constants within indices
  487. // and lower a GEP with multiple indices to either arithmetic operations or
  488. // multiple GEPs with single index.
  489. addPass(createSeparateConstOffsetFromGEPPass(true));
  490. // Call EarlyCSE pass to find and remove subexpressions in the lowered
  491. // result.
  492. addPass(createEarlyCSEPass());
  493. // Do loop invariant code motion in case part of the lowered result is
  494. // invariant.
  495. addPass(createLICMPass());
  496. }
  497. // Add Control Flow Guard checks.
  498. if (TM->getTargetTriple().isOSWindows())
  499. addPass(createCFGuardCheckPass());
  500. }
  501. // Pass Pipeline Configuration
  502. bool AArch64PassConfig::addPreISel() {
  503. // Run promote constant before global merge, so that the promoted constants
  504. // get a chance to be merged
  505. if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
  506. addPass(createAArch64PromoteConstantPass());
  507. // FIXME: On AArch64, this depends on the type.
  508. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
  509. // and the offset has to be a multiple of the related size in bytes.
  510. if ((TM->getOptLevel() != CodeGenOpt::None &&
  511. EnableGlobalMerge == cl::BOU_UNSET) ||
  512. EnableGlobalMerge == cl::BOU_TRUE) {
  513. bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
  514. (EnableGlobalMerge == cl::BOU_UNSET);
  515. // Merging of extern globals is enabled by default on non-Mach-O as we
  516. // expect it to be generally either beneficial or harmless. On Mach-O it
  517. // is disabled as we emit the .subsections_via_symbols directive which
  518. // means that merging extern globals is not safe.
  519. bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
  520. // FIXME: extern global merging is only enabled when we optimise for size
  521. // because there are some regressions with it also enabled for performance.
  522. if (!OnlyOptimizeForSize)
  523. MergeExternalByDefault = false;
  524. addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
  525. MergeExternalByDefault));
  526. }
  527. return false;
  528. }
  529. void AArch64PassConfig::addCodeGenPrepare() {
  530. if (getOptLevel() != CodeGenOpt::None)
  531. addPass(createTypePromotionPass());
  532. TargetPassConfig::addCodeGenPrepare();
  533. }
  534. bool AArch64PassConfig::addInstSelector() {
  535. addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
  536. // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
  537. // references to _TLS_MODULE_BASE_ as possible.
  538. if (TM->getTargetTriple().isOSBinFormatELF() &&
  539. getOptLevel() != CodeGenOpt::None)
  540. addPass(createAArch64CleanupLocalDynamicTLSPass());
  541. return false;
  542. }
  543. bool AArch64PassConfig::addIRTranslator() {
  544. addPass(new IRTranslator(getOptLevel()));
  545. return false;
  546. }
  547. void AArch64PassConfig::addPreLegalizeMachineIR() {
  548. if (getOptLevel() == CodeGenOpt::None)
  549. addPass(createAArch64O0PreLegalizerCombiner());
  550. else {
  551. addPass(createAArch64PreLegalizerCombiner());
  552. if (EnableGISelLoadStoreOptPreLegal)
  553. addPass(new LoadStoreOpt());
  554. }
  555. }
  556. bool AArch64PassConfig::addLegalizeMachineIR() {
  557. addPass(new Legalizer());
  558. return false;
  559. }
  560. void AArch64PassConfig::addPreRegBankSelect() {
  561. bool IsOptNone = getOptLevel() == CodeGenOpt::None;
  562. if (!IsOptNone) {
  563. addPass(createAArch64PostLegalizerCombiner(IsOptNone));
  564. if (EnableGISelLoadStoreOptPostLegal)
  565. addPass(new LoadStoreOpt());
  566. }
  567. addPass(createAArch64PostLegalizerLowering());
  568. }
  569. bool AArch64PassConfig::addRegBankSelect() {
  570. addPass(new RegBankSelect());
  571. return false;
  572. }
  573. void AArch64PassConfig::addPreGlobalInstructionSelect() {
  574. addPass(new Localizer());
  575. }
  576. bool AArch64PassConfig::addGlobalInstructionSelect() {
  577. addPass(new InstructionSelect(getOptLevel()));
  578. if (getOptLevel() != CodeGenOpt::None)
  579. addPass(createAArch64PostSelectOptimize());
  580. return false;
  581. }
  582. void AArch64PassConfig::addMachineSSAOptimization() {
  583. // Run default MachineSSAOptimization first.
  584. TargetPassConfig::addMachineSSAOptimization();
  585. if (TM->getOptLevel() != CodeGenOpt::None)
  586. addPass(createAArch64MIPeepholeOptPass());
  587. }
  588. bool AArch64PassConfig::addILPOpts() {
  589. if (EnableCondOpt)
  590. addPass(createAArch64ConditionOptimizerPass());
  591. if (EnableCCMP)
  592. addPass(createAArch64ConditionalCompares());
  593. if (EnableMCR)
  594. addPass(&MachineCombinerID);
  595. if (EnableCondBrTuning)
  596. addPass(createAArch64CondBrTuning());
  597. if (EnableEarlyIfConversion)
  598. addPass(&EarlyIfConverterID);
  599. if (EnableStPairSuppress)
  600. addPass(createAArch64StorePairSuppressPass());
  601. addPass(createAArch64SIMDInstrOptPass());
  602. if (TM->getOptLevel() != CodeGenOpt::None)
  603. addPass(createAArch64StackTaggingPreRAPass());
  604. return true;
  605. }
  606. void AArch64PassConfig::addPreRegAlloc() {
  607. // Change dead register definitions to refer to the zero register.
  608. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
  609. addPass(createAArch64DeadRegisterDefinitions());
  610. // Use AdvSIMD scalar instructions whenever profitable.
  611. if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
  612. addPass(createAArch64AdvSIMDScalar());
  613. // The AdvSIMD pass may produce copies that can be rewritten to
  614. // be register coalescer friendly.
  615. addPass(&PeepholeOptimizerID);
  616. }
  617. }
  618. void AArch64PassConfig::addPostRegAlloc() {
  619. // Remove redundant copy instructions.
  620. if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
  621. addPass(createAArch64RedundantCopyEliminationPass());
  622. if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
  623. // Improve performance for some FP/SIMD code for A57.
  624. addPass(createAArch64A57FPLoadBalancing());
  625. }
  626. void AArch64PassConfig::addPreSched2() {
  627. // Lower homogeneous frame instructions
  628. if (EnableHomogeneousPrologEpilog)
  629. addPass(createAArch64LowerHomogeneousPrologEpilogPass());
  630. // Expand some pseudo instructions to allow proper scheduling.
  631. addPass(createAArch64ExpandPseudoPass());
  632. // Use load/store pair instructions when possible.
  633. if (TM->getOptLevel() != CodeGenOpt::None) {
  634. if (EnableLoadStoreOpt)
  635. addPass(createAArch64LoadStoreOptimizationPass());
  636. }
  637. // The AArch64SpeculationHardeningPass destroys dominator tree and natural
  638. // loop info, which is needed for the FalkorHWPFFixPass and also later on.
  639. // Therefore, run the AArch64SpeculationHardeningPass before the
  640. // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
  641. // info.
  642. addPass(createAArch64SpeculationHardeningPass());
  643. addPass(createAArch64IndirectThunks());
  644. addPass(createAArch64SLSHardeningPass());
  645. if (TM->getOptLevel() != CodeGenOpt::None) {
  646. if (EnableFalkorHWPFFix)
  647. addPass(createFalkorHWPFFixPass());
  648. }
  649. }
  650. void AArch64PassConfig::addPreEmitPass() {
  651. // Machine Block Placement might have created new opportunities when run
  652. // at O3, where the Tail Duplication Threshold is set to 4 instructions.
  653. // Run the load/store optimizer once more.
  654. if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
  655. addPass(createAArch64LoadStoreOptimizationPass());
  656. addPass(createAArch64A53Fix835769());
  657. if (EnableBranchTargets)
  658. addPass(createAArch64BranchTargetsPass());
  659. // Relax conditional branch instructions if they're otherwise out of
  660. // range of their destination.
  661. if (BranchRelaxation)
  662. addPass(&BranchRelaxationPassID);
  663. if (TM->getTargetTriple().isOSWindows()) {
  664. // Identify valid longjmp targets for Windows Control Flow Guard.
  665. addPass(createCFGuardLongjmpPass());
  666. // Identify valid eh continuation targets for Windows EHCont Guard.
  667. addPass(createEHContGuardCatchretPass());
  668. }
  669. if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
  670. addPass(createAArch64CompressJumpTablesPass());
  671. if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
  672. TM->getTargetTriple().isOSBinFormatMachO())
  673. addPass(createAArch64CollectLOHPass());
  674. }
  675. void AArch64PassConfig::addPreEmitPass2() {
  676. // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo
  677. // instructions are lowered to bundles as well.
  678. addPass(createUnpackMachineBundles(nullptr));
  679. }
  680. yaml::MachineFunctionInfo *
  681. AArch64TargetMachine::createDefaultFuncInfoYAML() const {
  682. return new yaml::AArch64FunctionInfo();
  683. }
  684. yaml::MachineFunctionInfo *
  685. AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
  686. const auto *MFI = MF.getInfo<AArch64FunctionInfo>();
  687. return new yaml::AArch64FunctionInfo(*MFI);
  688. }
  689. bool AArch64TargetMachine::parseMachineFunctionInfo(
  690. const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
  691. SMDiagnostic &Error, SMRange &SourceRange) const {
  692. const auto &YamlMFI =
  693. reinterpret_cast<const yaml::AArch64FunctionInfo &>(MFI);
  694. MachineFunction &MF = PFS.MF;
  695. MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
  696. return false;
  697. }