AArch64TargetMachine.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861
  1. //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "AArch64TargetMachine.h"
  12. #include "AArch64.h"
  13. #include "AArch64MachineFunctionInfo.h"
  14. #include "AArch64MachineScheduler.h"
  15. #include "AArch64MacroFusion.h"
  16. #include "AArch64Subtarget.h"
  17. #include "AArch64TargetObjectFile.h"
  18. #include "AArch64TargetTransformInfo.h"
  19. #include "MCTargetDesc/AArch64MCTargetDesc.h"
  20. #include "TargetInfo/AArch64TargetInfo.h"
  21. #include "llvm/ADT/STLExtras.h"
  22. #include "llvm/ADT/Triple.h"
  23. #include "llvm/Analysis/TargetTransformInfo.h"
  24. #include "llvm/CodeGen/CFIFixup.h"
  25. #include "llvm/CodeGen/CSEConfigBase.h"
  26. #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
  27. #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  28. #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
  29. #include "llvm/CodeGen/GlobalISel/Legalizer.h"
  30. #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
  31. #include "llvm/CodeGen/GlobalISel/Localizer.h"
  32. #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
  33. #include "llvm/CodeGen/MIRParser/MIParser.h"
  34. #include "llvm/CodeGen/MachineScheduler.h"
  35. #include "llvm/CodeGen/Passes.h"
  36. #include "llvm/CodeGen/TargetInstrInfo.h"
  37. #include "llvm/CodeGen/TargetPassConfig.h"
  38. #include "llvm/IR/Attributes.h"
  39. #include "llvm/IR/Function.h"
  40. #include "llvm/InitializePasses.h"
  41. #include "llvm/MC/MCAsmInfo.h"
  42. #include "llvm/MC/MCTargetOptions.h"
  43. #include "llvm/MC/TargetRegistry.h"
  44. #include "llvm/Pass.h"
  45. #include "llvm/Support/CodeGen.h"
  46. #include "llvm/Support/CommandLine.h"
  47. #include "llvm/Target/TargetLoweringObjectFile.h"
  48. #include "llvm/Target/TargetOptions.h"
  49. #include "llvm/Transforms/CFGuard.h"
  50. #include "llvm/Transforms/Scalar.h"
  51. #include <memory>
  52. #include <optional>
  53. #include <string>
  54. using namespace llvm;
  55. static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
  56. cl::desc("Enable the CCMP formation pass"),
  57. cl::init(true), cl::Hidden);
  58. static cl::opt<bool>
  59. EnableCondBrTuning("aarch64-enable-cond-br-tune",
  60. cl::desc("Enable the conditional branch tuning pass"),
  61. cl::init(true), cl::Hidden);
  62. static cl::opt<bool> EnableAArch64CopyPropagation(
  63. "aarch64-enable-copy-propagation",
  64. cl::desc("Enable the copy propagation with AArch64 copy instr"),
  65. cl::init(true), cl::Hidden);
  66. static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
  67. cl::desc("Enable the machine combiner pass"),
  68. cl::init(true), cl::Hidden);
  69. static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
  70. cl::desc("Suppress STP for AArch64"),
  71. cl::init(true), cl::Hidden);
  72. static cl::opt<bool> EnableAdvSIMDScalar(
  73. "aarch64-enable-simd-scalar",
  74. cl::desc("Enable use of AdvSIMD scalar integer instructions"),
  75. cl::init(false), cl::Hidden);
  76. static cl::opt<bool>
  77. EnablePromoteConstant("aarch64-enable-promote-const",
  78. cl::desc("Enable the promote constant pass"),
  79. cl::init(true), cl::Hidden);
  80. static cl::opt<bool> EnableCollectLOH(
  81. "aarch64-enable-collect-loh",
  82. cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
  83. cl::init(true), cl::Hidden);
  84. static cl::opt<bool>
  85. EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
  86. cl::desc("Enable the pass that removes dead"
  87. " definitons and replaces stores to"
  88. " them with stores to the zero"
  89. " register"),
  90. cl::init(true));
  91. static cl::opt<bool> EnableRedundantCopyElimination(
  92. "aarch64-enable-copyelim",
  93. cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
  94. cl::Hidden);
  95. static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
  96. cl::desc("Enable the load/store pair"
  97. " optimization pass"),
  98. cl::init(true), cl::Hidden);
  99. static cl::opt<bool> EnableAtomicTidy(
  100. "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
  101. cl::desc("Run SimplifyCFG after expanding atomic operations"
  102. " to make use of cmpxchg flow-based information"),
  103. cl::init(true));
  104. static cl::opt<bool>
  105. EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
  106. cl::desc("Run early if-conversion"),
  107. cl::init(true));
  108. static cl::opt<bool>
  109. EnableCondOpt("aarch64-enable-condopt",
  110. cl::desc("Enable the condition optimizer pass"),
  111. cl::init(true), cl::Hidden);
  112. static cl::opt<bool>
  113. EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
  114. cl::desc("Enable optimizations on complex GEPs"),
  115. cl::init(false));
  116. static cl::opt<bool>
  117. EnableSelectOpt("aarch64-select-opt", cl::Hidden,
  118. cl::desc("Enable select to branch optimizations"),
  119. cl::init(true));
  120. static cl::opt<bool>
  121. BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
  122. cl::desc("Relax out of range conditional branches"));
  123. static cl::opt<bool> EnableCompressJumpTables(
  124. "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
  125. cl::desc("Use smallest entry possible for jump tables"));
  126. // FIXME: Unify control over GlobalMerge.
  127. static cl::opt<cl::boolOrDefault>
  128. EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
  129. cl::desc("Enable the global merge pass"));
  130. static cl::opt<bool>
  131. EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
  132. cl::desc("Enable the loop data prefetch pass"),
  133. cl::init(true));
  134. static cl::opt<int> EnableGlobalISelAtO(
  135. "aarch64-enable-global-isel-at-O", cl::Hidden,
  136. cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
  137. cl::init(0));
  138. static cl::opt<bool>
  139. EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
  140. cl::desc("Enable SVE intrinsic opts"),
  141. cl::init(true));
  142. static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
  143. cl::init(true), cl::Hidden);
  144. static cl::opt<bool>
  145. EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
  146. cl::desc("Enable the AArch64 branch target pass"),
  147. cl::init(true));
  148. static cl::opt<unsigned> SVEVectorBitsMaxOpt(
  149. "aarch64-sve-vector-bits-max",
  150. cl::desc("Assume SVE vector registers are at most this big, "
  151. "with zero meaning no maximum size is assumed."),
  152. cl::init(0), cl::Hidden);
  153. static cl::opt<unsigned> SVEVectorBitsMinOpt(
  154. "aarch64-sve-vector-bits-min",
  155. cl::desc("Assume SVE vector registers are at least this big, "
  156. "with zero meaning no minimum size is assumed."),
  157. cl::init(0), cl::Hidden);
  158. extern cl::opt<bool> EnableHomogeneousPrologEpilog;
  159. static cl::opt<bool> EnableGISelLoadStoreOptPreLegal(
  160. "aarch64-enable-gisel-ldst-prelegal",
  161. cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"),
  162. cl::init(true), cl::Hidden);
  163. static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
  164. "aarch64-enable-gisel-ldst-postlegal",
  165. cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
  166. cl::init(false), cl::Hidden);
  167. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
  168. // Register the target.
  169. RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
  170. RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
  171. RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
  172. RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target());
  173. RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
  174. auto PR = PassRegistry::getPassRegistry();
  175. initializeGlobalISel(*PR);
  176. initializeAArch64A53Fix835769Pass(*PR);
  177. initializeAArch64A57FPLoadBalancingPass(*PR);
  178. initializeAArch64AdvSIMDScalarPass(*PR);
  179. initializeAArch64BranchTargetsPass(*PR);
  180. initializeAArch64CollectLOHPass(*PR);
  181. initializeAArch64CompressJumpTablesPass(*PR);
  182. initializeAArch64ConditionalComparesPass(*PR);
  183. initializeAArch64ConditionOptimizerPass(*PR);
  184. initializeAArch64DeadRegisterDefinitionsPass(*PR);
  185. initializeAArch64ExpandPseudoPass(*PR);
  186. initializeAArch64KCFIPass(*PR);
  187. initializeAArch64LoadStoreOptPass(*PR);
  188. initializeAArch64MIPeepholeOptPass(*PR);
  189. initializeAArch64SIMDInstrOptPass(*PR);
  190. initializeAArch64O0PreLegalizerCombinerPass(*PR);
  191. initializeAArch64PreLegalizerCombinerPass(*PR);
  192. initializeAArch64PostLegalizerCombinerPass(*PR);
  193. initializeAArch64PostLegalizerLoweringPass(*PR);
  194. initializeAArch64PostSelectOptimizePass(*PR);
  195. initializeAArch64PromoteConstantPass(*PR);
  196. initializeAArch64RedundantCopyEliminationPass(*PR);
  197. initializeAArch64StorePairSuppressPass(*PR);
  198. initializeFalkorHWPFFixPass(*PR);
  199. initializeFalkorMarkStridedAccessesLegacyPass(*PR);
  200. initializeLDTLSCleanupPass(*PR);
  201. initializeSMEABIPass(*PR);
  202. initializeSVEIntrinsicOptsPass(*PR);
  203. initializeAArch64SpeculationHardeningPass(*PR);
  204. initializeAArch64SLSHardeningPass(*PR);
  205. initializeAArch64StackTaggingPass(*PR);
  206. initializeAArch64StackTaggingPreRAPass(*PR);
  207. initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
  208. initializeAArch64DAGToDAGISelPass(*PR);
  209. }
  210. //===----------------------------------------------------------------------===//
  211. // AArch64 Lowering public interface.
  212. //===----------------------------------------------------------------------===//
  213. static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  214. if (TT.isOSBinFormatMachO())
  215. return std::make_unique<AArch64_MachoTargetObjectFile>();
  216. if (TT.isOSBinFormatCOFF())
  217. return std::make_unique<AArch64_COFFTargetObjectFile>();
  218. return std::make_unique<AArch64_ELFTargetObjectFile>();
  219. }
  220. // Helper function to build a DataLayout string
  221. static std::string computeDataLayout(const Triple &TT,
  222. const MCTargetOptions &Options,
  223. bool LittleEndian) {
  224. if (TT.isOSBinFormatMachO()) {
  225. if (TT.getArch() == Triple::aarch64_32)
  226. return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128";
  227. return "e-m:o-i64:64-i128:128-n32:64-S128";
  228. }
  229. if (TT.isOSBinFormatCOFF())
  230. return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
  231. std::string Endian = LittleEndian ? "e" : "E";
  232. std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
  233. return Endian + "-m:e" + Ptr32 +
  234. "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
  235. }
  236. static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
  237. if (CPU.empty() && TT.isArm64e())
  238. return "apple-a12";
  239. return CPU;
  240. }
  241. static Reloc::Model getEffectiveRelocModel(const Triple &TT,
  242. std::optional<Reloc::Model> RM) {
  243. // AArch64 Darwin and Windows are always PIC.
  244. if (TT.isOSDarwin() || TT.isOSWindows())
  245. return Reloc::PIC_;
  246. // On ELF platforms the default static relocation model has a smart enough
  247. // linker to cope with referencing external symbols defined in a shared
  248. // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
  249. if (!RM || *RM == Reloc::DynamicNoPIC)
  250. return Reloc::Static;
  251. return *RM;
  252. }
  253. static CodeModel::Model
  254. getEffectiveAArch64CodeModel(const Triple &TT,
  255. std::optional<CodeModel::Model> CM, bool JIT) {
  256. if (CM) {
  257. if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
  258. *CM != CodeModel::Large) {
  259. report_fatal_error(
  260. "Only small, tiny and large code models are allowed on AArch64");
  261. } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
  262. report_fatal_error("tiny code model is only supported on ELF");
  263. return *CM;
  264. }
  265. // The default MCJIT memory managers make no guarantees about where they can
  266. // find an executable page; JITed code needs to be able to refer to globals
  267. // no matter how far away they are.
  268. // We should set the CodeModel::Small for Windows ARM64 in JIT mode,
  269. // since with large code model LLVM generating 4 MOV instructions, and
  270. // Windows doesn't support relocating these long branch (4 MOVs).
  271. if (JIT && !TT.isOSWindows())
  272. return CodeModel::Large;
  273. return CodeModel::Small;
  274. }
  275. /// Create an AArch64 architecture model.
  276. ///
  277. AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
  278. StringRef CPU, StringRef FS,
  279. const TargetOptions &Options,
  280. std::optional<Reloc::Model> RM,
  281. std::optional<CodeModel::Model> CM,
  282. CodeGenOpt::Level OL, bool JIT,
  283. bool LittleEndian)
  284. : LLVMTargetMachine(T,
  285. computeDataLayout(TT, Options.MCOptions, LittleEndian),
  286. TT, computeDefaultCPU(TT, CPU), FS, Options,
  287. getEffectiveRelocModel(TT, RM),
  288. getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
  289. TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
  290. initAsmInfo();
  291. if (TT.isOSBinFormatMachO()) {
  292. this->Options.TrapUnreachable = true;
  293. this->Options.NoTrapAfterNoreturn = true;
  294. }
  295. if (getMCAsmInfo()->usesWindowsCFI()) {
  296. // Unwinding can get confused if the last instruction in an
  297. // exception-handling region (function, funclet, try block, etc.)
  298. // is a call.
  299. //
  300. // FIXME: We could elide the trap if the next instruction would be in
  301. // the same region anyway.
  302. this->Options.TrapUnreachable = true;
  303. }
  304. if (this->Options.TLSSize == 0) // default
  305. this->Options.TLSSize = 24;
  306. if ((getCodeModel() == CodeModel::Small ||
  307. getCodeModel() == CodeModel::Kernel) &&
  308. this->Options.TLSSize > 32)
  309. // for the small (and kernel) code model, the maximum TLS size is 4GiB
  310. this->Options.TLSSize = 32;
  311. else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24)
  312. // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB)
  313. this->Options.TLSSize = 24;
  314. // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
  315. // MachO/CodeModel::Large, which GlobalISel does not support.
  316. if (getOptLevel() <= EnableGlobalISelAtO &&
  317. TT.getArch() != Triple::aarch64_32 &&
  318. TT.getEnvironment() != Triple::GNUILP32 &&
  319. !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
  320. setGlobalISel(true);
  321. setGlobalISelAbort(GlobalISelAbortMode::Disable);
  322. }
  323. // AArch64 supports the MachineOutliner.
  324. setMachineOutliner(true);
  325. // AArch64 supports default outlining behaviour.
  326. setSupportsDefaultOutlining(true);
  327. // AArch64 supports the debug entry values.
  328. setSupportsDebugEntryValues(true);
  329. // AArch64 supports fixing up the DWARF unwind information.
  330. if (!getMCAsmInfo()->usesWindowsCFI())
  331. setCFIFixup(true);
  332. }
  333. AArch64TargetMachine::~AArch64TargetMachine() = default;
  334. const AArch64Subtarget *
  335. AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
  336. Attribute CPUAttr = F.getFnAttribute("target-cpu");
  337. Attribute TuneAttr = F.getFnAttribute("tune-cpu");
  338. Attribute FSAttr = F.getFnAttribute("target-features");
  339. StringRef CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString() : TargetCPU;
  340. StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
  341. StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
  342. bool StreamingSVEModeDisabled =
  343. !F.hasFnAttribute("aarch64_pstate_sm_enabled") &&
  344. !F.hasFnAttribute("aarch64_pstate_sm_compatible") &&
  345. !F.hasFnAttribute("aarch64_pstate_sm_body");
  346. unsigned MinSVEVectorSize = 0;
  347. unsigned MaxSVEVectorSize = 0;
  348. Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange);
  349. if (VScaleRangeAttr.isValid()) {
  350. std::optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax();
  351. MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128;
  352. MaxSVEVectorSize = VScaleMax ? *VScaleMax * 128 : 0;
  353. } else {
  354. MinSVEVectorSize = SVEVectorBitsMinOpt;
  355. MaxSVEVectorSize = SVEVectorBitsMaxOpt;
  356. }
  357. assert(MinSVEVectorSize % 128 == 0 &&
  358. "SVE requires vector length in multiples of 128!");
  359. assert(MaxSVEVectorSize % 128 == 0 &&
  360. "SVE requires vector length in multiples of 128!");
  361. assert((MaxSVEVectorSize >= MinSVEVectorSize || MaxSVEVectorSize == 0) &&
  362. "Minimum SVE vector size should not be larger than its maximum!");
  363. // Sanitize user input in case of no asserts
  364. if (MaxSVEVectorSize == 0)
  365. MinSVEVectorSize = (MinSVEVectorSize / 128) * 128;
  366. else {
  367. MinSVEVectorSize =
  368. (std::min(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
  369. MaxSVEVectorSize =
  370. (std::max(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128;
  371. }
  372. SmallString<512> Key;
  373. raw_svector_ostream(Key) << "SVEMin" << MinSVEVectorSize << "SVEMax"
  374. << MaxSVEVectorSize << "StreamingSVEModeDisabled="
  375. << StreamingSVEModeDisabled << CPU << TuneCPU << FS;
  376. auto &I = SubtargetMap[Key];
  377. if (!I) {
  378. // This needs to be done before we create a new subtarget since any
  379. // creation will depend on the TM and the code generation flags on the
  380. // function that reside in TargetOptions.
  381. resetTargetOptions(F);
  382. I = std::make_unique<AArch64Subtarget>(
  383. TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
  384. MaxSVEVectorSize, StreamingSVEModeDisabled);
  385. }
  386. return I.get();
  387. }
  388. void AArch64leTargetMachine::anchor() { }
  389. AArch64leTargetMachine::AArch64leTargetMachine(
  390. const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
  391. const TargetOptions &Options, std::optional<Reloc::Model> RM,
  392. std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
  393. : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
  394. void AArch64beTargetMachine::anchor() { }
  395. AArch64beTargetMachine::AArch64beTargetMachine(
  396. const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
  397. const TargetOptions &Options, std::optional<Reloc::Model> RM,
  398. std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
  399. : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
  400. namespace {
  401. /// AArch64 Code Generator Pass Configuration Options.
  402. class AArch64PassConfig : public TargetPassConfig {
  403. public:
  404. AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
  405. : TargetPassConfig(TM, PM) {
  406. if (TM.getOptLevel() != CodeGenOpt::None)
  407. substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
  408. }
  409. AArch64TargetMachine &getAArch64TargetMachine() const {
  410. return getTM<AArch64TargetMachine>();
  411. }
  412. ScheduleDAGInstrs *
  413. createMachineScheduler(MachineSchedContext *C) const override {
  414. const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
  415. ScheduleDAGMILive *DAG = createGenericSchedLive(C);
  416. DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
  417. DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
  418. if (ST.hasFusion())
  419. DAG->addMutation(createAArch64MacroFusionDAGMutation());
  420. return DAG;
  421. }
  422. ScheduleDAGInstrs *
  423. createPostMachineScheduler(MachineSchedContext *C) const override {
  424. const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
  425. ScheduleDAGMI *DAG =
  426. new ScheduleDAGMI(C, std::make_unique<AArch64PostRASchedStrategy>(C),
  427. /* RemoveKillFlags=*/true);
  428. if (ST.hasFusion()) {
  429. // Run the Macro Fusion after RA again since literals are expanded from
  430. // pseudos then (v. addPreSched2()).
  431. DAG->addMutation(createAArch64MacroFusionDAGMutation());
  432. return DAG;
  433. }
  434. return DAG;
  435. }
  436. void addIRPasses() override;
  437. bool addPreISel() override;
  438. void addCodeGenPrepare() override;
  439. bool addInstSelector() override;
  440. bool addIRTranslator() override;
  441. void addPreLegalizeMachineIR() override;
  442. bool addLegalizeMachineIR() override;
  443. void addPreRegBankSelect() override;
  444. bool addRegBankSelect() override;
  445. void addPreGlobalInstructionSelect() override;
  446. bool addGlobalInstructionSelect() override;
  447. void addMachineSSAOptimization() override;
  448. bool addILPOpts() override;
  449. void addPreRegAlloc() override;
  450. void addPostRegAlloc() override;
  451. void addPreSched2() override;
  452. void addPreEmitPass() override;
  453. void addPreEmitPass2() override;
  454. std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
  455. };
  456. } // end anonymous namespace
  457. TargetTransformInfo
  458. AArch64TargetMachine::getTargetTransformInfo(const Function &F) const {
  459. return TargetTransformInfo(AArch64TTIImpl(this, F));
  460. }
  461. TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
  462. return new AArch64PassConfig(*this, PM);
  463. }
  464. std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
  465. return getStandardCSEConfigForOpt(TM->getOptLevel());
  466. }
  467. void AArch64PassConfig::addIRPasses() {
  468. // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
  469. // ourselves.
  470. addPass(createAtomicExpandPass());
  471. // Expand any SVE vector library calls that we can't code generate directly.
  472. if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
  473. addPass(createSVEIntrinsicOptsPass());
  474. // Cmpxchg instructions are often used with a subsequent comparison to
  475. // determine whether it succeeded. We can exploit existing control-flow in
  476. // ldrex/strex loops to simplify this, but it needs tidying up.
  477. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
  478. addPass(createCFGSimplificationPass(SimplifyCFGOptions()
  479. .forwardSwitchCondToPhi(true)
  480. .convertSwitchRangeToICmp(true)
  481. .convertSwitchToLookupTable(true)
  482. .needCanonicalLoops(false)
  483. .hoistCommonInsts(true)
  484. .sinkCommonInsts(true)));
  485. // Run LoopDataPrefetch
  486. //
  487. // Run this before LSR to remove the multiplies involved in computing the
  488. // pointer values N iterations ahead.
  489. if (TM->getOptLevel() != CodeGenOpt::None) {
  490. if (EnableLoopDataPrefetch)
  491. addPass(createLoopDataPrefetchPass());
  492. if (EnableFalkorHWPFFix)
  493. addPass(createFalkorMarkStridedAccessesPass());
  494. }
  495. if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
  496. // Call SeparateConstOffsetFromGEP pass to extract constants within indices
  497. // and lower a GEP with multiple indices to either arithmetic operations or
  498. // multiple GEPs with single index.
  499. addPass(createSeparateConstOffsetFromGEPPass(true));
  500. // Call EarlyCSE pass to find and remove subexpressions in the lowered
  501. // result.
  502. addPass(createEarlyCSEPass());
  503. // Do loop invariant code motion in case part of the lowered result is
  504. // invariant.
  505. addPass(createLICMPass());
  506. }
  507. TargetPassConfig::addIRPasses();
  508. if (getOptLevel() == CodeGenOpt::Aggressive && EnableSelectOpt)
  509. addPass(createSelectOptimizePass());
  510. addPass(createAArch64StackTaggingPass(
  511. /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
  512. // Match complex arithmetic patterns
  513. if (TM->getOptLevel() >= CodeGenOpt::Default)
  514. addPass(createComplexDeinterleavingPass(TM));
  515. // Match interleaved memory accesses to ldN/stN intrinsics.
  516. if (TM->getOptLevel() != CodeGenOpt::None) {
  517. addPass(createInterleavedLoadCombinePass());
  518. addPass(createInterleavedAccessPass());
  519. }
  520. // Expand any functions marked with SME attributes which require special
  521. // changes for the calling convention or that require the lazy-saving
  522. // mechanism specified in the SME ABI.
  523. addPass(createSMEABIPass());
  524. // Add Control Flow Guard checks.
  525. if (TM->getTargetTriple().isOSWindows())
  526. addPass(createCFGuardCheckPass());
  527. if (TM->Options.JMCInstrument)
  528. addPass(createJMCInstrumenterPass());
  529. }
  530. // Pass Pipeline Configuration
  531. bool AArch64PassConfig::addPreISel() {
  532. // Run promote constant before global merge, so that the promoted constants
  533. // get a chance to be merged
  534. if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
  535. addPass(createAArch64PromoteConstantPass());
  536. // FIXME: On AArch64, this depends on the type.
  537. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
  538. // and the offset has to be a multiple of the related size in bytes.
  539. if ((TM->getOptLevel() != CodeGenOpt::None &&
  540. EnableGlobalMerge == cl::BOU_UNSET) ||
  541. EnableGlobalMerge == cl::BOU_TRUE) {
  542. bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
  543. (EnableGlobalMerge == cl::BOU_UNSET);
  544. // Merging of extern globals is enabled by default on non-Mach-O as we
  545. // expect it to be generally either beneficial or harmless. On Mach-O it
  546. // is disabled as we emit the .subsections_via_symbols directive which
  547. // means that merging extern globals is not safe.
  548. bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
  549. // FIXME: extern global merging is only enabled when we optimise for size
  550. // because there are some regressions with it also enabled for performance.
  551. if (!OnlyOptimizeForSize)
  552. MergeExternalByDefault = false;
  553. addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
  554. MergeExternalByDefault));
  555. }
  556. return false;
  557. }
  558. void AArch64PassConfig::addCodeGenPrepare() {
  559. if (getOptLevel() != CodeGenOpt::None)
  560. addPass(createTypePromotionLegacyPass());
  561. TargetPassConfig::addCodeGenPrepare();
  562. }
  563. bool AArch64PassConfig::addInstSelector() {
  564. addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
  565. // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
  566. // references to _TLS_MODULE_BASE_ as possible.
  567. if (TM->getTargetTriple().isOSBinFormatELF() &&
  568. getOptLevel() != CodeGenOpt::None)
  569. addPass(createAArch64CleanupLocalDynamicTLSPass());
  570. return false;
  571. }
  572. bool AArch64PassConfig::addIRTranslator() {
  573. addPass(new IRTranslator(getOptLevel()));
  574. return false;
  575. }
  576. void AArch64PassConfig::addPreLegalizeMachineIR() {
  577. if (getOptLevel() == CodeGenOpt::None)
  578. addPass(createAArch64O0PreLegalizerCombiner());
  579. else {
  580. addPass(createAArch64PreLegalizerCombiner());
  581. if (EnableGISelLoadStoreOptPreLegal)
  582. addPass(new LoadStoreOpt());
  583. }
  584. }
  585. bool AArch64PassConfig::addLegalizeMachineIR() {
  586. addPass(new Legalizer());
  587. return false;
  588. }
  589. void AArch64PassConfig::addPreRegBankSelect() {
  590. bool IsOptNone = getOptLevel() == CodeGenOpt::None;
  591. if (!IsOptNone) {
  592. addPass(createAArch64PostLegalizerCombiner(IsOptNone));
  593. if (EnableGISelLoadStoreOptPostLegal)
  594. addPass(new LoadStoreOpt());
  595. }
  596. addPass(createAArch64PostLegalizerLowering());
  597. }
  598. bool AArch64PassConfig::addRegBankSelect() {
  599. addPass(new RegBankSelect());
  600. return false;
  601. }
  602. void AArch64PassConfig::addPreGlobalInstructionSelect() {
  603. addPass(new Localizer());
  604. }
  605. bool AArch64PassConfig::addGlobalInstructionSelect() {
  606. addPass(new InstructionSelect(getOptLevel()));
  607. if (getOptLevel() != CodeGenOpt::None)
  608. addPass(createAArch64PostSelectOptimize());
  609. return false;
  610. }
  611. void AArch64PassConfig::addMachineSSAOptimization() {
  612. // Run default MachineSSAOptimization first.
  613. TargetPassConfig::addMachineSSAOptimization();
  614. if (TM->getOptLevel() != CodeGenOpt::None)
  615. addPass(createAArch64MIPeepholeOptPass());
  616. }
  617. bool AArch64PassConfig::addILPOpts() {
  618. if (EnableCondOpt)
  619. addPass(createAArch64ConditionOptimizerPass());
  620. if (EnableCCMP)
  621. addPass(createAArch64ConditionalCompares());
  622. if (EnableMCR)
  623. addPass(&MachineCombinerID);
  624. if (EnableCondBrTuning)
  625. addPass(createAArch64CondBrTuning());
  626. if (EnableEarlyIfConversion)
  627. addPass(&EarlyIfConverterID);
  628. if (EnableStPairSuppress)
  629. addPass(createAArch64StorePairSuppressPass());
  630. addPass(createAArch64SIMDInstrOptPass());
  631. if (TM->getOptLevel() != CodeGenOpt::None)
  632. addPass(createAArch64StackTaggingPreRAPass());
  633. return true;
  634. }
  635. void AArch64PassConfig::addPreRegAlloc() {
  636. // Change dead register definitions to refer to the zero register.
  637. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
  638. addPass(createAArch64DeadRegisterDefinitions());
  639. // Use AdvSIMD scalar instructions whenever profitable.
  640. if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
  641. addPass(createAArch64AdvSIMDScalar());
  642. // The AdvSIMD pass may produce copies that can be rewritten to
  643. // be register coalescer friendly.
  644. addPass(&PeepholeOptimizerID);
  645. }
  646. }
  647. void AArch64PassConfig::addPostRegAlloc() {
  648. // Remove redundant copy instructions.
  649. if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
  650. addPass(createAArch64RedundantCopyEliminationPass());
  651. if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
  652. // Improve performance for some FP/SIMD code for A57.
  653. addPass(createAArch64A57FPLoadBalancing());
  654. }
  655. void AArch64PassConfig::addPreSched2() {
  656. // Lower homogeneous frame instructions
  657. if (EnableHomogeneousPrologEpilog)
  658. addPass(createAArch64LowerHomogeneousPrologEpilogPass());
  659. // Expand some pseudo instructions to allow proper scheduling.
  660. addPass(createAArch64ExpandPseudoPass());
  661. // Use load/store pair instructions when possible.
  662. if (TM->getOptLevel() != CodeGenOpt::None) {
  663. if (EnableLoadStoreOpt)
  664. addPass(createAArch64LoadStoreOptimizationPass());
  665. }
  666. // Emit KCFI checks for indirect calls.
  667. addPass(createAArch64KCFIPass());
  668. // The AArch64SpeculationHardeningPass destroys dominator tree and natural
  669. // loop info, which is needed for the FalkorHWPFFixPass and also later on.
  670. // Therefore, run the AArch64SpeculationHardeningPass before the
  671. // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
  672. // info.
  673. addPass(createAArch64SpeculationHardeningPass());
  674. addPass(createAArch64IndirectThunks());
  675. addPass(createAArch64SLSHardeningPass());
  676. if (TM->getOptLevel() != CodeGenOpt::None) {
  677. if (EnableFalkorHWPFFix)
  678. addPass(createFalkorHWPFFixPass());
  679. }
  680. }
  681. void AArch64PassConfig::addPreEmitPass() {
  682. // Machine Block Placement might have created new opportunities when run
  683. // at O3, where the Tail Duplication Threshold is set to 4 instructions.
  684. // Run the load/store optimizer once more.
  685. if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
  686. addPass(createAArch64LoadStoreOptimizationPass());
  687. if (TM->getOptLevel() >= CodeGenOpt::Aggressive &&
  688. EnableAArch64CopyPropagation)
  689. addPass(createMachineCopyPropagationPass(true));
  690. addPass(createAArch64A53Fix835769());
  691. if (EnableBranchTargets)
  692. addPass(createAArch64BranchTargetsPass());
  693. // Relax conditional branch instructions if they're otherwise out of
  694. // range of their destination.
  695. if (BranchRelaxation)
  696. addPass(&BranchRelaxationPassID);
  697. if (TM->getTargetTriple().isOSWindows()) {
  698. // Identify valid longjmp targets for Windows Control Flow Guard.
  699. addPass(createCFGuardLongjmpPass());
  700. // Identify valid eh continuation targets for Windows EHCont Guard.
  701. addPass(createEHContGuardCatchretPass());
  702. }
  703. if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
  704. addPass(createAArch64CompressJumpTablesPass());
  705. if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
  706. TM->getTargetTriple().isOSBinFormatMachO())
  707. addPass(createAArch64CollectLOHPass());
  708. }
  709. void AArch64PassConfig::addPreEmitPass2() {
  710. // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo
  711. // instructions are lowered to bundles as well.
  712. addPass(createUnpackMachineBundles(nullptr));
  713. }
  714. MachineFunctionInfo *AArch64TargetMachine::createMachineFunctionInfo(
  715. BumpPtrAllocator &Allocator, const Function &F,
  716. const TargetSubtargetInfo *STI) const {
  717. return AArch64FunctionInfo::create<AArch64FunctionInfo>(
  718. Allocator, F, static_cast<const AArch64Subtarget *>(STI));
  719. }
  720. yaml::MachineFunctionInfo *
  721. AArch64TargetMachine::createDefaultFuncInfoYAML() const {
  722. return new yaml::AArch64FunctionInfo();
  723. }
  724. yaml::MachineFunctionInfo *
  725. AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
  726. const auto *MFI = MF.getInfo<AArch64FunctionInfo>();
  727. return new yaml::AArch64FunctionInfo(*MFI);
  728. }
  729. bool AArch64TargetMachine::parseMachineFunctionInfo(
  730. const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
  731. SMDiagnostic &Error, SMRange &SourceRange) const {
  732. const auto &YamlMFI = static_cast<const yaml::AArch64FunctionInfo &>(MFI);
  733. MachineFunction &MF = PFS.MF;
  734. MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
  735. return false;
  736. }