AArch64LowerHomogeneousPrologEpilog.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that lowers homogeneous prolog/epilog instructions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "AArch64InstrInfo.h"
  13. #include "AArch64Subtarget.h"
  14. #include "MCTargetDesc/AArch64InstPrinter.h"
  15. #include "Utils/AArch64BaseInfo.h"
  16. #include "llvm/CodeGen/MachineBasicBlock.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/MachineFunctionPass.h"
  19. #include "llvm/CodeGen/MachineInstr.h"
  20. #include "llvm/CodeGen/MachineInstrBuilder.h"
  21. #include "llvm/CodeGen/MachineModuleInfo.h"
  22. #include "llvm/CodeGen/MachineOperand.h"
  23. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  24. #include "llvm/IR/DebugLoc.h"
  25. #include "llvm/IR/IRBuilder.h"
  26. #include "llvm/Pass.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include <sstream>
  29. using namespace llvm;
  30. #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \
  31. "AArch64 homogeneous prolog/epilog lowering pass"
  32. cl::opt<int> FrameHelperSizeThreshold(
  33. "frame-helper-size-threshold", cl::init(2), cl::Hidden,
  34. cl::desc("The minimum number of instructions that are outlined in a frame "
  35. "helper (default = 2)"));
  36. namespace {
  37. class AArch64LowerHomogeneousPE {
  38. public:
  39. const AArch64InstrInfo *TII;
  40. AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
  41. : M(M), MMI(MMI) {}
  42. bool run();
  43. bool runOnMachineFunction(MachineFunction &Fn);
  44. private:
  45. Module *M;
  46. MachineModuleInfo *MMI;
  47. bool runOnMBB(MachineBasicBlock &MBB);
  48. bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  49. MachineBasicBlock::iterator &NextMBBI);
  50. /// Lower a HOM_Prolog pseudo instruction into a helper call
  51. /// or a sequence of homogeneous stores.
  52. /// When a a fp setup follows, it can be optimized.
  53. bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  54. MachineBasicBlock::iterator &NextMBBI);
  55. /// Lower a HOM_Epilog pseudo instruction into a helper call
  56. /// or a sequence of homogeneous loads.
  57. /// When a return follow, it can be optimized.
  58. bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  59. MachineBasicBlock::iterator &NextMBBI);
  60. };
  61. class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
  62. public:
  63. static char ID;
  64. AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
  65. initializeAArch64LowerHomogeneousPrologEpilogPass(
  66. *PassRegistry::getPassRegistry());
  67. }
  68. void getAnalysisUsage(AnalysisUsage &AU) const override {
  69. AU.addRequired<MachineModuleInfoWrapperPass>();
  70. AU.addPreserved<MachineModuleInfoWrapperPass>();
  71. AU.setPreservesAll();
  72. ModulePass::getAnalysisUsage(AU);
  73. }
  74. bool runOnModule(Module &M) override;
  75. StringRef getPassName() const override {
  76. return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
  77. }
  78. };
  79. } // end anonymous namespace
  80. char AArch64LowerHomogeneousPrologEpilog::ID = 0;
  81. INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
  82. "aarch64-lower-homogeneous-prolog-epilog",
  83. AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
  84. bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
  85. if (skipModule(M))
  86. return false;
  87. MachineModuleInfo *MMI =
  88. &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
  89. return AArch64LowerHomogeneousPE(&M, MMI).run();
  90. }
  91. bool AArch64LowerHomogeneousPE::run() {
  92. bool Changed = false;
  93. for (auto &F : *M) {
  94. if (F.empty())
  95. continue;
  96. MachineFunction *MF = MMI->getMachineFunction(F);
  97. if (!MF)
  98. continue;
  99. Changed |= runOnMachineFunction(*MF);
  100. }
  101. return Changed;
  102. }
  103. enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
  104. /// Return a frame helper name with the given CSRs and the helper type.
  105. /// For instance, a prolog helper that saves x19 and x20 is named as
  106. /// OUTLINED_FUNCTION_PROLOG_x19x20.
  107. static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
  108. FrameHelperType Type, unsigned FpOffset) {
  109. std::ostringstream RegStream;
  110. switch (Type) {
  111. case FrameHelperType::Prolog:
  112. RegStream << "OUTLINED_FUNCTION_PROLOG_";
  113. break;
  114. case FrameHelperType::PrologFrame:
  115. RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
  116. break;
  117. case FrameHelperType::Epilog:
  118. RegStream << "OUTLINED_FUNCTION_EPILOG_";
  119. break;
  120. case FrameHelperType::EpilogTail:
  121. RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
  122. break;
  123. }
  124. for (auto Reg : Regs)
  125. RegStream << AArch64InstPrinter::getRegisterName(Reg);
  126. return RegStream.str();
  127. }
  128. /// Create a Function for the unique frame helper with the given name.
  129. /// Return a newly created MachineFunction with an empty MachineBasicBlock.
  130. static MachineFunction &createFrameHelperMachineFunction(Module *M,
  131. MachineModuleInfo *MMI,
  132. StringRef Name) {
  133. LLVMContext &C = M->getContext();
  134. Function *F = M->getFunction(Name);
  135. assert(F == nullptr && "Function has been created before");
  136. F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
  137. Function::ExternalLinkage, Name, M);
  138. assert(F && "Function was null!");
  139. // Use ODR linkage to avoid duplication.
  140. F->setLinkage(GlobalValue::LinkOnceODRLinkage);
  141. F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  142. // Set no-opt/minsize, so we don't insert padding between outlined
  143. // functions.
  144. F->addFnAttr(Attribute::OptimizeNone);
  145. F->addFnAttr(Attribute::NoInline);
  146. F->addFnAttr(Attribute::MinSize);
  147. F->addFnAttr(Attribute::Naked);
  148. MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
  149. // Remove unnecessary register liveness and set NoVRegs.
  150. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
  151. MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
  152. MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
  153. MF.getRegInfo().freezeReservedRegs(MF);
  154. // Create entry block.
  155. BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
  156. IRBuilder<> Builder(EntryBB);
  157. Builder.CreateRetVoid();
  158. // Insert the new block into the function.
  159. MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
  160. MF.insert(MF.begin(), MBB);
  161. return MF;
  162. }
  163. /// Emit a store-pair instruction for frame-setup.
  164. static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
  165. MachineBasicBlock::iterator Pos,
  166. const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
  167. int Offset, bool IsPreDec) {
  168. bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
  169. assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
  170. unsigned Opc;
  171. if (IsPreDec)
  172. Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
  173. else
  174. Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
  175. MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
  176. if (IsPreDec)
  177. MIB.addDef(AArch64::SP);
  178. MIB.addReg(Reg2)
  179. .addReg(Reg1)
  180. .addReg(AArch64::SP)
  181. .addImm(Offset)
  182. .setMIFlag(MachineInstr::FrameSetup);
  183. }
  184. /// Emit a load-pair instruction for frame-destroy.
  185. static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
  186. MachineBasicBlock::iterator Pos,
  187. const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
  188. int Offset, bool IsPostDec) {
  189. bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
  190. assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
  191. unsigned Opc;
  192. if (IsPostDec)
  193. Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
  194. else
  195. Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
  196. MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
  197. if (IsPostDec)
  198. MIB.addDef(AArch64::SP);
  199. MIB.addReg(Reg2, getDefRegState(true))
  200. .addReg(Reg1, getDefRegState(true))
  201. .addReg(AArch64::SP)
  202. .addImm(Offset)
  203. .setMIFlag(MachineInstr::FrameDestroy);
  204. }
  205. /// Return a unique function if a helper can be formed with the given Regs
  206. /// and frame type.
  207. /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
  208. /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
  209. /// stp x20, x19, [sp, #16]
  210. /// ret
  211. ///
  212. /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
  213. /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller
  214. /// stp x20, x19, [sp, #16]
  215. /// add fp, sp, #32
  216. /// ret
  217. ///
  218. /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
  219. /// mov x16, x30
  220. /// ldp x29, x30, [sp, #32]
  221. /// ldp x20, x19, [sp, #16]
  222. /// ldp x22, x21, [sp], #48
  223. /// ret x16
  224. ///
  225. /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
  226. /// ldp x29, x30, [sp, #32]
  227. /// ldp x20, x19, [sp, #16]
  228. /// ldp x22, x21, [sp], #48
  229. /// ret
  230. /// @param M module
  231. /// @param MMI machine module info
  232. /// @param Regs callee save regs that the helper will handle
  233. /// @param Type frame helper type
  234. /// @return a helper function
  235. static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
  236. SmallVectorImpl<unsigned> &Regs,
  237. FrameHelperType Type,
  238. unsigned FpOffset = 0) {
  239. assert(Regs.size() >= 2);
  240. auto Name = getFrameHelperName(Regs, Type, FpOffset);
  241. auto *F = M->getFunction(Name);
  242. if (F)
  243. return F;
  244. auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
  245. MachineBasicBlock &MBB = *MF.begin();
  246. const TargetSubtargetInfo &STI = MF.getSubtarget();
  247. const TargetInstrInfo &TII = *STI.getInstrInfo();
  248. int Size = (int)Regs.size();
  249. switch (Type) {
  250. case FrameHelperType::Prolog:
  251. case FrameHelperType::PrologFrame: {
  252. // Compute the remaining SP adjust beyond FP/LR.
  253. auto LRIdx = std::distance(
  254. Regs.begin(), std::find(Regs.begin(), Regs.end(), AArch64::LR));
  255. // If the register stored to the lowest address is not LR, we must subtract
  256. // more from SP here.
  257. if (LRIdx != Size - 2) {
  258. assert(Regs[Size - 2] != AArch64::LR);
  259. emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
  260. LRIdx - Size + 2, true);
  261. }
  262. // Store CSRs in the reverse order.
  263. for (int I = Size - 3; I >= 0; I -= 2) {
  264. // FP/LR has been stored at call-site.
  265. if (Regs[I - 1] == AArch64::LR)
  266. continue;
  267. emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
  268. false);
  269. }
  270. if (Type == FrameHelperType::PrologFrame)
  271. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
  272. .addDef(AArch64::FP)
  273. .addUse(AArch64::SP)
  274. .addImm(FpOffset)
  275. .addImm(0)
  276. .setMIFlag(MachineInstr::FrameSetup);
  277. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
  278. .addReg(AArch64::LR);
  279. break;
  280. }
  281. case FrameHelperType::Epilog:
  282. case FrameHelperType::EpilogTail:
  283. if (Type == FrameHelperType::Epilog)
  284. // Stash LR to X16
  285. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
  286. .addDef(AArch64::X16)
  287. .addReg(AArch64::XZR)
  288. .addUse(AArch64::LR)
  289. .addImm(0);
  290. for (int I = 0; I < Size - 2; I += 2)
  291. emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
  292. false);
  293. // Restore the last CSR with post-increment of SP.
  294. emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
  295. true);
  296. BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
  297. .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
  298. break;
  299. }
  300. return M->getFunction(Name);
  301. }
  302. /// This function checks if a frame helper should be used for
  303. /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
  304. /// @param MBB machine basic block
  305. /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog
  306. /// @param Regs callee save registers that are saved or restored.
  307. /// @param Type frame helper type
  308. /// @return True if a use of helper is qualified.
  309. static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
  310. MachineBasicBlock::iterator &NextMBBI,
  311. SmallVectorImpl<unsigned> &Regs,
  312. FrameHelperType Type) {
  313. const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
  314. auto RegCount = Regs.size();
  315. assert(RegCount > 0 && (RegCount % 2 == 0));
  316. // # of instructions that will be outlined.
  317. int InstCount = RegCount / 2;
  318. // Do not use a helper call when not saving LR.
  319. if (!llvm::is_contained(Regs, AArch64::LR))
  320. return false;
  321. switch (Type) {
  322. case FrameHelperType::Prolog:
  323. // Prolog helper cannot save FP/LR.
  324. InstCount--;
  325. break;
  326. case FrameHelperType::PrologFrame: {
  327. // Effecitvely no change in InstCount since FpAdjusment is included.
  328. break;
  329. }
  330. case FrameHelperType::Epilog:
  331. // Bail-out if X16 is live across the epilog helper because it is used in
  332. // the helper to handle X30.
  333. for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
  334. if (NextMI->readsRegister(AArch64::W16, TRI))
  335. return false;
  336. }
  337. // Epilog may not be in the last block. Check the liveness in successors.
  338. for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
  339. if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
  340. return false;
  341. }
  342. // No change in InstCount for the regular epilog case.
  343. break;
  344. case FrameHelperType::EpilogTail: {
  345. // EpilogTail helper includes the caller's return.
  346. if (NextMBBI == MBB.end())
  347. return false;
  348. if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
  349. return false;
  350. InstCount++;
  351. break;
  352. }
  353. }
  354. return InstCount >= FrameHelperSizeThreshold;
  355. }
  356. /// Lower a HOM_Epilog pseudo instruction into a helper call while
  357. /// creating the helper on demand. Or emit a sequence of loads in place when not
  358. /// using a helper call.
  359. ///
  360. /// 1. With a helper including ret
  361. /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI
  362. /// ret ; NextMBBI
  363. /// =>
  364. /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
  365. /// ... ; NextMBBI
  366. ///
  367. /// 2. With a helper
  368. /// HOM_Epilog x30, x29, x19, x20, x21, x22
  369. /// =>
  370. /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
  371. ///
  372. /// 3. Without a helper
  373. /// HOM_Epilog x30, x29, x19, x20, x21, x22
  374. /// =>
  375. /// ldp x29, x30, [sp, #32]
  376. /// ldp x20, x19, [sp, #16]
  377. /// ldp x22, x21, [sp], #48
  378. bool AArch64LowerHomogeneousPE::lowerEpilog(
  379. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  380. MachineBasicBlock::iterator &NextMBBI) {
  381. auto &MF = *MBB.getParent();
  382. MachineInstr &MI = *MBBI;
  383. DebugLoc DL = MI.getDebugLoc();
  384. SmallVector<unsigned, 8> Regs;
  385. for (auto &MO : MI.operands())
  386. if (MO.isReg())
  387. Regs.push_back(MO.getReg());
  388. int Size = (int)Regs.size();
  389. if (Size == 0)
  390. return false;
  391. // Registers are in pair.
  392. assert(Size % 2 == 0);
  393. assert(MI.getOpcode() == AArch64::HOM_Epilog);
  394. auto Return = NextMBBI;
  395. if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
  396. // When MBB ends with a return, emit a tail-call to the epilog helper
  397. auto *EpilogTailHelper =
  398. getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
  399. BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
  400. .addGlobalAddress(EpilogTailHelper)
  401. .addImm(0)
  402. .setMIFlag(MachineInstr::FrameDestroy)
  403. .copyImplicitOps(MI)
  404. .copyImplicitOps(*Return);
  405. NextMBBI = std::next(Return);
  406. Return->removeFromParent();
  407. } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
  408. FrameHelperType::Epilog)) {
  409. // The default epilog helper case.
  410. auto *EpilogHelper =
  411. getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
  412. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
  413. .addGlobalAddress(EpilogHelper)
  414. .setMIFlag(MachineInstr::FrameDestroy)
  415. .copyImplicitOps(MI);
  416. } else {
  417. // Fall back to no-helper.
  418. for (int I = 0; I < Size - 2; I += 2)
  419. emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
  420. // Restore the last CSR with post-increment of SP.
  421. emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
  422. }
  423. MBBI->removeFromParent();
  424. return true;
  425. }
  426. /// Lower a HOM_Prolog pseudo instruction into a helper call while
  427. /// creating the helper on demand. Or emit a sequence of stores in place when
  428. /// not using a helper call.
  429. ///
  430. /// 1. With a helper including frame-setup
  431. /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32
  432. /// =>
  433. /// stp x29, x30, [sp, #-16]!
  434. /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
  435. ///
  436. /// 2. With a helper
  437. /// HOM_Prolog x30, x29, x19, x20, x21, x22
  438. /// =>
  439. /// stp x29, x30, [sp, #-16]!
  440. /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
  441. ///
  442. /// 3. Without a helper
  443. /// HOM_Prolog x30, x29, x19, x20, x21, x22
  444. /// =>
  445. /// stp x22, x21, [sp, #-48]!
  446. /// stp x20, x19, [sp, #16]
  447. /// stp x29, x30, [sp, #32]
  448. bool AArch64LowerHomogeneousPE::lowerProlog(
  449. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  450. MachineBasicBlock::iterator &NextMBBI) {
  451. auto &MF = *MBB.getParent();
  452. MachineInstr &MI = *MBBI;
  453. DebugLoc DL = MI.getDebugLoc();
  454. SmallVector<unsigned, 8> Regs;
  455. int LRIdx = 0;
  456. Optional<int> FpOffset;
  457. for (auto &MO : MI.operands()) {
  458. if (MO.isReg()) {
  459. if (MO.getReg() == AArch64::LR)
  460. LRIdx = Regs.size();
  461. Regs.push_back(MO.getReg());
  462. } else if (MO.isImm()) {
  463. FpOffset = MO.getImm();
  464. }
  465. }
  466. int Size = (int)Regs.size();
  467. if (Size == 0)
  468. return false;
  469. // Allow compact unwind case only for oww.
  470. assert(Size % 2 == 0);
  471. assert(MI.getOpcode() == AArch64::HOM_Prolog);
  472. if (FpOffset &&
  473. shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
  474. // FP/LR is stored at the top of stack before the prolog helper call.
  475. emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
  476. auto *PrologFrameHelper = getOrCreateFrameHelper(
  477. M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
  478. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
  479. .addGlobalAddress(PrologFrameHelper)
  480. .setMIFlag(MachineInstr::FrameSetup)
  481. .copyImplicitOps(MI)
  482. .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
  483. .addReg(AArch64::SP, RegState::Implicit);
  484. } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
  485. FrameHelperType::Prolog)) {
  486. // FP/LR is stored at the top of stack before the prolog helper call.
  487. emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
  488. auto *PrologHelper =
  489. getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
  490. BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
  491. .addGlobalAddress(PrologHelper)
  492. .setMIFlag(MachineInstr::FrameSetup)
  493. .copyImplicitOps(MI);
  494. } else {
  495. // Fall back to no-helper.
  496. emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
  497. for (int I = Size - 3; I >= 0; I -= 2)
  498. emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
  499. if (FpOffset) {
  500. BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
  501. .addDef(AArch64::FP)
  502. .addUse(AArch64::SP)
  503. .addImm(*FpOffset)
  504. .addImm(0)
  505. .setMIFlag(MachineInstr::FrameSetup);
  506. }
  507. }
  508. MBBI->removeFromParent();
  509. return true;
  510. }
  511. /// Process each machine instruction
  512. /// @param MBB machine basic block
  513. /// @param MBBI current instruction iterator
  514. /// @param NextMBBI next instruction iterator which can be updated
  515. /// @return True when IR is changed.
  516. bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
  517. MachineBasicBlock::iterator MBBI,
  518. MachineBasicBlock::iterator &NextMBBI) {
  519. MachineInstr &MI = *MBBI;
  520. unsigned Opcode = MI.getOpcode();
  521. switch (Opcode) {
  522. default:
  523. break;
  524. case AArch64::HOM_Prolog:
  525. return lowerProlog(MBB, MBBI, NextMBBI);
  526. case AArch64::HOM_Epilog:
  527. return lowerEpilog(MBB, MBBI, NextMBBI);
  528. }
  529. return false;
  530. }
  531. bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
  532. bool Modified = false;
  533. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  534. while (MBBI != E) {
  535. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  536. Modified |= runOnMI(MBB, MBBI, NMBBI);
  537. MBBI = NMBBI;
  538. }
  539. return Modified;
  540. }
  541. bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
  542. TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  543. bool Modified = false;
  544. for (auto &MBB : MF)
  545. Modified |= runOnMBB(MBB);
  546. return Modified;
  547. }
  548. ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
  549. return new AArch64LowerHomogeneousPrologEpilog();
  550. }