AArch64LowerHomogeneousPrologEpilog.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that lowers homogeneous prolog/epilog instructions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "AArch64InstrInfo.h"
  13. #include "AArch64Subtarget.h"
  14. #include "MCTargetDesc/AArch64InstPrinter.h"
  15. #include "Utils/AArch64BaseInfo.h"
  16. #include "llvm/CodeGen/MachineBasicBlock.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/MachineFunctionPass.h"
  19. #include "llvm/CodeGen/MachineInstr.h"
  20. #include "llvm/CodeGen/MachineInstrBuilder.h"
  21. #include "llvm/CodeGen/MachineModuleInfo.h"
  22. #include "llvm/CodeGen/MachineOperand.h"
  23. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  24. #include "llvm/IR/DebugLoc.h"
  25. #include "llvm/IR/IRBuilder.h"
  26. #include "llvm/Pass.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include <optional>
  29. #include <sstream>
  30. using namespace llvm;
  31. #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
  32. "AArch64 homogeneous prolog/epilog lowering pass"
  33. cl::opt<int> FrameHelperSizeThreshold(
  34. "frame-helper-size-threshold", cl::init(2), cl::Hidden,
  35. cl::desc("The minimum number of instructions that are outlined in a frame "
  36. "helper (default = 2)"));
  37. namespace {
  38. class AArch64LowerHomogeneousPE {
  39. public:
  40. const AArch64InstrInfo *TII;
  41. AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
  42. : M(M), MMI(MMI) {}
  43. bool run();
  44. bool runOnMachineFunction(MachineFunction &Fn);
  45. private:
  46. Module *M;
  47. MachineModuleInfo *MMI;
  48. bool runOnMBB(MachineBasicBlock &MBB);
  49. bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  50. MachineBasicBlock::iterator &NextMBBI);
  51. /// Lower a HOM_Prolog pseudo instruction into a helper call
  52. /// or a sequence of homogeneous stores.
  53. /// When a a fp setup follows, it can be optimized.
  54. bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  55. MachineBasicBlock::iterator &NextMBBI);
  56. /// Lower a HOM_Epilog pseudo instruction into a helper call
  57. /// or a sequence of homogeneous loads.
  58. /// When a return follow, it can be optimized.
  59. bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  60. MachineBasicBlock::iterator &NextMBBI);
  61. };
  62. class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
  63. public:
  64. static char ID;
  65. AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
  66. initializeAArch64LowerHomogeneousPrologEpilogPass(
  67. *PassRegistry::getPassRegistry());
  68. }
  69. void getAnalysisUsage(AnalysisUsage &AU) const override {
  70. AU.addRequired<MachineModuleInfoWrapperPass>();
  71. AU.addPreserved<MachineModuleInfoWrapperPass>();
  72. AU.setPreservesAll();
  73. ModulePass::getAnalysisUsage(AU);
  74. }
  75. bool runOnModule(Module &M) override;
  76. StringRef getPassName() const override {
  77. return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
  78. }
  79. };
  80. } // end anonymous namespace
  81. char AArch64LowerHomogeneousPrologEpilog::ID = 0;
  82. INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
  83. "aarch64-lower-homogeneous-prolog-epilog",
  84. AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
  85. bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
  86. if (skipModule(M))
  87. return false;
  88. MachineModuleInfo *MMI =
  89. &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
  90. return AArch64LowerHomogeneousPE(&M, MMI).run();
  91. }
  92. bool AArch64LowerHomogeneousPE::run() {
  93. bool Changed = false;
  94. for (auto &F : *M) {
  95. if (F.empty())
  96. continue;
  97. MachineFunction *MF = MMI->getMachineFunction(F);
  98. if (!MF)
  99. continue;
  100. Changed |= runOnMachineFunction(*MF);
  101. }
  102. return Changed;
  103. }
  104. enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
  105. /// Return a frame helper name with the given CSRs and the helper type.
  106. /// For instance, a prolog helper that saves x19 and x20 is named as
  107. /// OUTLINED_FUNCTION_PROLOG_x19x20.
  108. static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
  109. FrameHelperType Type, unsigned FpOffset) {
  110. std::ostringstream RegStream;
  111. switch (Type) {
  112. case FrameHelperType::Prolog:
  113. RegStream << "OUTLINED_FUNCTION_PROLOG_";
  114. break;
  115. case FrameHelperType::PrologFrame:
  116. RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
  117. break;
  118. case FrameHelperType::Epilog:
  119. RegStream << "OUTLINED_FUNCTION_EPILOG_";
  120. break;
  121. case FrameHelperType::EpilogTail:
  122. RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
  123. break;
  124. }
  125. for (auto Reg : Regs)
  126. RegStream << AArch64InstPrinter::getRegisterName(Reg);
  127. return RegStream.str();
  128. }
  129. /// Create a Function for the unique frame helper with the given name.
  130. /// Return a newly created MachineFunction with an empty MachineBasicBlock.
  131. static MachineFunction &createFrameHelperMachineFunction(Module *M,
  132. MachineModuleInfo *MMI,
  133. StringRef Name) {
  134. LLVMContext &C = M->getContext();
  135. Function *F = M->getFunction(Name);
  136. assert(F == nullptr && "Function has been created before");
  137. F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
  138. Function::ExternalLinkage, Name, M);
  139. assert(F && "Function was null!");
  140. // Use ODR linkage to avoid duplication.
  141. F->setLinkage(GlobalValue::LinkOnceODRLinkage);
  142. F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  143. // Set no-opt/minsize, so we don't insert padding between outlined
  144. // functions.
  145. F->addFnAttr(Attribute::OptimizeNone);
  146. F->addFnAttr(Attribute::NoInline);
  147. F->addFnAttr(Attribute::MinSize);
  148. F->addFnAttr(Attribute::Naked);
  149. MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
  150. // Remove unnecessary register liveness and set NoVRegs.
  151. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
  152. MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
  153. MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
  154. MF.getRegInfo().freezeReservedRegs(MF);
  155. // Create entry block.
  156. BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
  157. IRBuilder<> Builder(EntryBB);
  158. Builder.CreateRetVoid();
  159. // Insert the new block into the function.
  160. MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
  161. MF.insert(MF.begin(), MBB);
  162. return MF;
  163. }
  164. /// Emit a store-pair instruction for frame-setup.
  165. static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
  166. MachineBasicBlock::iterator Pos,
  167. const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
  168. int Offset, bool IsPreDec) {
  169. bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
  170. assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
  171. unsigned Opc;
  172. if (IsPreDec)
  173. Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
  174. else
  175. Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
  176. MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
  177. if (IsPreDec)
  178. MIB.addDef(AArch64::SP);
  179. MIB.addReg(Reg2)
  180. .addReg(Reg1)
  181. .addReg(AArch64::SP)
  182. .addImm(Offset)
  183. .setMIFlag(MachineInstr::FrameSetup);
  184. }
  185. /// Emit a load-pair instruction for frame-destroy.
  186. static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
  187. MachineBasicBlock::iterator Pos,
  188. const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
  189. int Offset, bool IsPostDec) {
  190. bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
  191. assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
  192. unsigned Opc;
  193. if (IsPostDec)
  194. Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
  195. else
  196. Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
  197. MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
  198. if (IsPostDec)
  199. MIB.addDef(AArch64::SP);
  200. MIB.addReg(Reg2, getDefRegState(true))
  201. .addReg(Reg1, getDefRegState(true))
  202. .addReg(AArch64::SP)
  203. .addImm(Offset)
  204. .setMIFlag(MachineInstr::FrameDestroy);
  205. }
  206. /// Return a unique function if a helper can be formed with the given Regs
  207. /// and frame type.
  208. /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
  209. /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
  210. /// stp x20, x19, [sp, #16]
  211. /// ret
  212. ///
  213. /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
  214. /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
  215. /// stp x20, x19, [sp, #16]
  216. /// add fp, sp, #32
  217. /// ret
  218. ///
  219. /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
  220. /// mov x16, x30
  221. /// ldp x29, x30, [sp, #32]
  222. /// ldp x20, x19, [sp, #16]
  223. /// ldp x22, x21, [sp], #48
  224. /// ret x16
  225. ///
  226. /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
  227. /// ldp x29, x30, [sp, #32]
  228. /// ldp x20, x19, [sp, #16]
  229. /// ldp x22, x21, [sp], #48
  230. /// ret
  231. /// @param M module
  232. /// @param MMI machine module info
  233. /// @param Regs callee save regs that the helper will handle
  234. /// @param Type frame helper type
  235. /// @return a helper function
  236. static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
  237. SmallVectorImpl<unsigned> &Regs,
  238. FrameHelperType Type,
  239. unsigned FpOffset = 0) {
  240. assert(Regs.size() >= 2);
  241. auto Name = getFrameHelperName(Regs, Type, FpOffset);
  242. auto *F = M->getFunction(Name);
  243. if (F)
  244. return F;
  245. auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
  246. MachineBasicBlock &MBB = *MF.begin();
  247. const TargetSubtargetInfo &STI = MF.getSubtarget();
  248. const TargetInstrInfo &TII = *STI.getInstrInfo();
  249. int Size = (int)Regs.size();
  250. switch (Type) {
  251. case FrameHelperType::Prolog:
  252. case FrameHelperType::PrologFrame: {
  253. // Compute the remaining SP adjust beyond FP/LR.
  254. auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR));
  255. // If the register stored to the lowest address is not LR, we must subtract
  256. // more from SP here.
  257. if (LRIdx != Size - 2) {
  258. assert(Regs[Size - 2] != AArch64::LR);
  259. emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
  260. LRIdx - Size + 2, true);
  261. }
  262. // Store CSRs in the reverse order.
  263. for (int I = Size - 3; I >= 0; I -= 2) {
  264. // FP/LR has been stored at call-site.
  265. if (Regs[I - 1] == AArch64::LR)
  266. continue;
  267. emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
  268. false);
  269. }
  270. if (Type == FrameHelperType::PrologFrame)
  271. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
  272. .addDef(AArch64::FP)
  273. .addUse(AArch64::SP)
  274. .addImm(FpOffset)
  275. .addImm(0)
  276. .setMIFlag(MachineInstr::FrameSetup);
  277. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
  278. .addReg(AArch64::LR);
  279. break;
  280. }
  281. case FrameHelperType::Epilog:
  282. case FrameHelperType::EpilogTail:
  283. if (Type == FrameHelperType::Epilog)
  284. // Stash LR to X16
  285. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
  286. .addDef(AArch64::X16)
  287. .addReg(AArch64::XZR)
  288. .addUse(AArch64::LR)
  289. .addImm(0);
  290. for (int I = 0; I < Size - 2; I += 2)
  291. emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
  292. false);
  293. // Restore the last CSR with post-increment of SP.
  294. emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
  295. true);
  296. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
  297. .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
  298. break;
  299. }
  300. return M->getFunction(Name);
  301. }
  302. /// This function checks if a frame helper should be used for
  303. /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
  304. /// @param MBB machine basic block
  305. /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
  306. /// @param Regs callee save registers that are saved or restored.
  307. /// @param Type frame helper type
  308. /// @return True if a use of helper is qualified.
  309. static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
  310. MachineBasicBlock::iterator &NextMBBI,
  311. SmallVectorImpl<unsigned> &Regs,
  312. FrameHelperType Type) {
  313. const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
  314. auto RegCount = Regs.size();
  315. assert(RegCount > 0 && (RegCount % 2 == 0));
  316. // # of instructions that will be outlined.
  317. int InstCount = RegCount / 2;
  318. // Do not use a helper call when not saving LR.
  319. if (!llvm::is_contained(Regs, AArch64::LR))
  320. return false;
  321. switch (Type) {
  322. case FrameHelperType::Prolog:
  323. // Prolog helper cannot save FP/LR.
  324. InstCount--;
  325. break;
  326. case FrameHelperType::PrologFrame: {
  327. // Effecitvely no change in InstCount since FpAdjusment is included.
  328. break;
  329. }
  330. case FrameHelperType::Epilog:
  331. // Bail-out if X16 is live across the epilog helper because it is used in
  332. // the helper to handle X30.
  333. for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
  334. if (NextMI->readsRegister(AArch64::W16, TRI))
  335. return false;
  336. }
  337. // Epilog may not be in the last block. Check the liveness in successors.
  338. for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
  339. if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
  340. return false;
  341. }
  342. // No change in InstCount for the regular epilog case.
  343. break;
  344. case FrameHelperType::EpilogTail: {
  345. // EpilogTail helper includes the caller's return.
  346. if (NextMBBI == MBB.end())
  347. return false;
  348. if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
  349. return false;
  350. InstCount++;
  351. break;
  352. }
  353. }
  354. return InstCount >= FrameHelperSizeThreshold;
  355. }
  356. /// Lower a HOM_Epilog pseudo instruction into a helper call while
  357. /// creating the helper on demand. Or emit a sequence of loads in place when not
  358. /// using a helper call.
  359. ///
  360. /// 1. With a helper including ret
  361. /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
  362. /// ret ; NextMBBI
  363. /// =>
  364. /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
  365. /// ... ; NextMBBI
  366. ///
  367. /// 2. With a helper
  368. /// HOM_Epilog x30, x29, x19, x20, x21, x22
  369. /// =>
  370. /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
  371. ///
  372. /// 3. Without a helper
  373. /// HOM_Epilog x30, x29, x19, x20, x21, x22
  374. /// =>
  375. /// ldp x29, x30, [sp, #32]
  376. /// ldp x20, x19, [sp, #16]
  377. /// ldp x22, x21, [sp], #48
  378. bool AArch64LowerHomogeneousPE::lowerEpilog(
  379. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  380. MachineBasicBlock::iterator &NextMBBI) {
  381. auto &MF = *MBB.getParent();
  382. MachineInstr &MI = *MBBI;
  383. DebugLoc DL = MI.getDebugLoc();
  384. SmallVector<unsigned, 8> Regs;
  385. for (auto &MO : MI.operands())
  386. if (MO.isReg())
  387. Regs.push_back(MO.getReg());
  388. int Size = (int)Regs.size();
  389. if (Size == 0)
  390. return false;
  391. // Registers are in pair.
  392. assert(Size % 2 == 0);
  393. assert(MI.getOpcode() == AArch64::HOM_Epilog);
  394. auto Return = NextMBBI;
  395. if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
  396. // When MBB ends with a return, emit a tail-call to the epilog helper
  397. auto *EpilogTailHelper =
  398. getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
  399. BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
  400. .addGlobalAddress(EpilogTailHelper)
  401. .addImm(0)
  402. .setMIFlag(MachineInstr::FrameDestroy)
  403. .copyImplicitOps(MI)
  404. .copyImplicitOps(*Return);
  405. NextMBBI = std::next(Return);
  406. Return->removeFromParent();
  407. } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
  408. FrameHelperType::Epilog)) {
  409. // The default epilog helper case.
  410. auto *EpilogHelper =
  411. getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
  412. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
  413. .addGlobalAddress(EpilogHelper)
  414. .setMIFlag(MachineInstr::FrameDestroy)
  415. .copyImplicitOps(MI);
  416. } else {
  417. // Fall back to no-helper.
  418. for (int I = 0; I < Size - 2; I += 2)
  419. emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
  420. // Restore the last CSR with post-increment of SP.
  421. emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
  422. }
  423. MBBI->removeFromParent();
  424. return true;
  425. }
  426. /// Lower a HOM_Prolog pseudo instruction into a helper call while
  427. /// creating the helper on demand. Or emit a sequence of stores in place when
  428. /// not using a helper call.
  429. ///
  430. /// 1. With a helper including frame-setup
  431. /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
  432. /// =>
  433. /// stp x29, x30, [sp, #-16]!
  434. /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
  435. ///
  436. /// 2. With a helper
  437. /// HOM_Prolog x30, x29, x19, x20, x21, x22
  438. /// =>
  439. /// stp x29, x30, [sp, #-16]!
  440. /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
  441. ///
  442. /// 3. Without a helper
  443. /// HOM_Prolog x30, x29, x19, x20, x21, x22
  444. /// =>
  445. /// stp x22, x21, [sp, #-48]!
  446. /// stp x20, x19, [sp, #16]
  447. /// stp x29, x30, [sp, #32]
  448. bool AArch64LowerHomogeneousPE::lowerProlog(
  449. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  450. MachineBasicBlock::iterator &NextMBBI) {
  451. auto &MF = *MBB.getParent();
  452. MachineInstr &MI = *MBBI;
  453. DebugLoc DL = MI.getDebugLoc();
  454. SmallVector<unsigned, 8> Regs;
  455. int LRIdx = 0;
  456. std::optional<int> FpOffset;
  457. for (auto &MO : MI.operands()) {
  458. if (MO.isReg()) {
  459. if (MO.getReg() == AArch64::LR)
  460. LRIdx = Regs.size();
  461. Regs.push_back(MO.getReg());
  462. } else if (MO.isImm()) {
  463. FpOffset = MO.getImm();
  464. }
  465. }
  466. int Size = (int)Regs.size();
  467. if (Size == 0)
  468. return false;
  469. // Allow compact unwind case only for oww.
  470. assert(Size % 2 == 0);
  471. assert(MI.getOpcode() == AArch64::HOM_Prolog);
  472. if (FpOffset &&
  473. shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
  474. // FP/LR is stored at the top of stack before the prolog helper call.
  475. emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
  476. auto *PrologFrameHelper = getOrCreateFrameHelper(
  477. M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
  478. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
  479. .addGlobalAddress(PrologFrameHelper)
  480. .setMIFlag(MachineInstr::FrameSetup)
  481. .copyImplicitOps(MI)
  482. .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
  483. .addReg(AArch64::SP, RegState::Implicit);
  484. } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
  485. FrameHelperType::Prolog)) {
  486. // FP/LR is stored at the top of stack before the prolog helper call.
  487. emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
  488. auto *PrologHelper =
  489. getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
  490. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
  491. .addGlobalAddress(PrologHelper)
  492. .setMIFlag(MachineInstr::FrameSetup)
  493. .copyImplicitOps(MI);
  494. } else {
  495. // Fall back to no-helper.
  496. emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
  497. for (int I = Size - 3; I >= 0; I -= 2)
  498. emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
  499. if (FpOffset) {
  500. BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
  501. .addDef(AArch64::FP)
  502. .addUse(AArch64::SP)
  503. .addImm(*FpOffset)
  504. .addImm(0)
  505. .setMIFlag(MachineInstr::FrameSetup);
  506. }
  507. }
  508. MBBI->removeFromParent();
  509. return true;
  510. }
  511. /// Process each machine instruction
  512. /// @param MBB machine basic block
  513. /// @param MBBI current instruction iterator
  514. /// @param NextMBBI next instruction iterator which can be updated
  515. /// @return True when IR is changed.
  516. bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
  517. MachineBasicBlock::iterator MBBI,
  518. MachineBasicBlock::iterator &NextMBBI) {
  519. MachineInstr &MI = *MBBI;
  520. unsigned Opcode = MI.getOpcode();
  521. switch (Opcode) {
  522. default:
  523. break;
  524. case AArch64::HOM_Prolog:
  525. return lowerProlog(MBB, MBBI, NextMBBI);
  526. case AArch64::HOM_Epilog:
  527. return lowerEpilog(MBB, MBBI, NextMBBI);
  528. }
  529. return false;
  530. }
  531. bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
  532. bool Modified = false;
  533. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  534. while (MBBI != E) {
  535. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  536. Modified |= runOnMI(MBB, MBBI, NMBBI);
  537. MBBI = NMBBI;
  538. }
  539. return Modified;
  540. }
  541. bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
  542. TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  543. bool Modified = false;
  544. for (auto &MBB : MF)
  545. Modified |= runOnMBB(MBB);
  546. return Modified;
  547. }
  548. ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
  549. return new AArch64LowerHomogeneousPrologEpilog();
  550. }