AArch64CollectLOH.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that collect the Linker Optimization Hint (LOH).
  10. // This pass should be run at the very end of the compilation flow, just before
  11. // assembly printer.
  12. // To be useful for the linker, the LOH must be printed into the assembly file.
  13. //
  14. // A LOH describes a sequence of instructions that may be optimized by the
  15. // linker.
  16. // This same sequence cannot be optimized by the compiler because some of
  17. // the information will be known at link time.
  18. // For instance, consider the following sequence:
  19. // L1: adrp xA, sym@PAGE
  20. // L2: add xB, xA, sym@PAGEOFF
  21. // L3: ldr xC, [xB, #imm]
  22. // This sequence can be turned into:
  23. // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
  24. // L3: ldr xC, sym+#imm
  25. // It may also be turned into either the following more efficient
  26. // code sequences:
  27. // - If sym@PAGEOFF + #imm fits the encoding space of L3.
  28. // L1: adrp xA, sym@PAGE
  29. // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
  30. // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
  31. // L1: adr xA, sym
  32. // L3: ldr xC, [xB, #imm]
  33. //
  34. // To be valid a LOH must meet all the requirements needed by all the related
  35. // possible linker transformations.
  36. // For instance, using the running example, the constraints to emit
  37. // ".loh AdrpAddLdr" are:
  38. // - L1, L2, and L3 instructions are of the expected type, i.e.,
  39. // respectively ADRP, ADD (immediate), and LD.
  40. // - The result of L1 is used only by L2.
  41. // - The register argument (xA) used in the ADD instruction is defined
  42. // only by L1.
  43. // - The result of L2 is used only by L3.
  44. // - The base address (xB) in L3 is defined only L2.
  45. // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
  46. // @PAGE/@PAGEOFF with no additional constants
  47. //
  48. // Currently supported LOHs are:
  49. // * So called non-ADRP-related:
  50. // - .loh AdrpAddLdr L1, L2, L3:
  51. // L1: adrp xA, sym@PAGE
  52. // L2: add xB, xA, sym@PAGEOFF
  53. // L3: ldr xC, [xB, #imm]
  54. // - .loh AdrpLdrGotLdr L1, L2, L3:
  55. // L1: adrp xA, sym@GOTPAGE
  56. // L2: ldr xB, [xA, sym@GOTPAGEOFF]
  57. // L3: ldr xC, [xB, #imm]
  58. // - .loh AdrpLdr L1, L3:
  59. // L1: adrp xA, sym@PAGE
  60. // L3: ldr xC, [xA, sym@PAGEOFF]
  61. // - .loh AdrpAddStr L1, L2, L3:
  62. // L1: adrp xA, sym@PAGE
  63. // L2: add xB, xA, sym@PAGEOFF
  64. // L3: str xC, [xB, #imm]
  65. // - .loh AdrpLdrGotStr L1, L2, L3:
  66. // L1: adrp xA, sym@GOTPAGE
  67. // L2: ldr xB, [xA, sym@GOTPAGEOFF]
  68. // L3: str xC, [xB, #imm]
  69. // - .loh AdrpAdd L1, L2:
  70. // L1: adrp xA, sym@PAGE
  71. // L2: add xB, xA, sym@PAGEOFF
  72. // For all these LOHs, L1, L2, L3 form a simple chain:
  73. // L1 result is used only by L2 and L2 result by L3.
  74. // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
  75. // by L1.
  76. // All these LOHs aim at using more efficient load/store patterns by folding
  77. // some instructions used to compute the address directly into the load/store.
  78. //
  79. // * So called ADRP-related:
  80. // - .loh AdrpAdrp L2, L1:
  81. // L2: ADRP xA, sym1@PAGE
  82. // L1: ADRP xA, sym2@PAGE
  83. // L2 dominates L1 and xA is not redifined between L2 and L1
  84. // This LOH aims at getting rid of redundant ADRP instructions.
  85. //
  86. // The overall design for emitting the LOHs is:
  87. // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
  88. // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
  89. // 1. Associates them a label.
  90. // 2. Emits them in a MCStreamer (EmitLOHDirective).
  91. // - The MCMachOStreamer records them into the MCAssembler.
  92. // - The MCAsmStreamer prints them.
  93. // - Other MCStreamers ignore them.
  94. // 3. Closes the MCStreamer:
  95. // - The MachObjectWriter gets them from the MCAssembler and writes
  96. // them in the object file.
  97. // - Other ObjectWriters ignore them.
  98. //===----------------------------------------------------------------------===//
  99. #include "AArch64.h"
  100. #include "AArch64InstrInfo.h"
  101. #include "AArch64MachineFunctionInfo.h"
  102. #include "llvm/ADT/BitVector.h"
  103. #include "llvm/ADT/DenseMap.h"
  104. #include "llvm/ADT/MapVector.h"
  105. #include "llvm/ADT/SmallSet.h"
  106. #include "llvm/ADT/SmallVector.h"
  107. #include "llvm/ADT/Statistic.h"
  108. #include "llvm/CodeGen/MachineBasicBlock.h"
  109. #include "llvm/CodeGen/MachineFunctionPass.h"
  110. #include "llvm/CodeGen/MachineInstr.h"
  111. #include "llvm/CodeGen/TargetRegisterInfo.h"
  112. #include "llvm/Support/Debug.h"
  113. #include "llvm/Support/ErrorHandling.h"
  114. #include "llvm/Support/raw_ostream.h"
  115. #include "llvm/Target/TargetMachine.h"
  116. using namespace llvm;
  117. #define DEBUG_TYPE "aarch64-collect-loh"
  118. STATISTIC(NumADRPSimpleCandidate,
  119. "Number of simplifiable ADRP dominate by another");
  120. STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
  121. STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
  122. STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
  123. STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
  124. STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
  125. STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
  126. #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
  127. namespace {
  128. struct AArch64CollectLOH : public MachineFunctionPass {
  129. static char ID;
  130. AArch64CollectLOH() : MachineFunctionPass(ID) {}
  131. bool runOnMachineFunction(MachineFunction &MF) override;
  132. MachineFunctionProperties getRequiredProperties() const override {
  133. return MachineFunctionProperties().set(
  134. MachineFunctionProperties::Property::NoVRegs);
  135. }
  136. StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
  137. void getAnalysisUsage(AnalysisUsage &AU) const override {
  138. MachineFunctionPass::getAnalysisUsage(AU);
  139. AU.setPreservesAll();
  140. }
  141. };
  142. char AArch64CollectLOH::ID = 0;
  143. } // end anonymous namespace.
  144. INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
  145. AARCH64_COLLECT_LOH_NAME, false, false)
  146. static bool canAddBePartOfLOH(const MachineInstr &MI) {
  147. // Check immediate to see if the immediate is an address.
  148. switch (MI.getOperand(2).getType()) {
  149. default:
  150. return false;
  151. case MachineOperand::MO_GlobalAddress:
  152. case MachineOperand::MO_JumpTableIndex:
  153. case MachineOperand::MO_ConstantPoolIndex:
  154. case MachineOperand::MO_BlockAddress:
  155. return true;
  156. }
  157. }
  158. /// Answer the following question: Can Def be one of the definition
  159. /// involved in a part of a LOH?
  160. static bool canDefBePartOfLOH(const MachineInstr &MI) {
  161. // Accept ADRP, ADDLow and LOADGot.
  162. switch (MI.getOpcode()) {
  163. default:
  164. return false;
  165. case AArch64::ADRP:
  166. return true;
  167. case AArch64::ADDXri:
  168. return canAddBePartOfLOH(MI);
  169. case AArch64::LDRXui:
  170. case AArch64::LDRWui:
  171. // Check immediate to see if the immediate is an address.
  172. switch (MI.getOperand(2).getType()) {
  173. default:
  174. return false;
  175. case MachineOperand::MO_GlobalAddress:
  176. return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
  177. }
  178. }
  179. }
  180. /// Check whether the given instruction can the end of a LOH chain involving a
  181. /// store.
  182. static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
  183. switch (MI.getOpcode()) {
  184. default:
  185. return false;
  186. case AArch64::STRBBui:
  187. case AArch64::STRHHui:
  188. case AArch64::STRBui:
  189. case AArch64::STRHui:
  190. case AArch64::STRWui:
  191. case AArch64::STRXui:
  192. case AArch64::STRSui:
  193. case AArch64::STRDui:
  194. case AArch64::STRQui:
  195. // We can only optimize the index operand.
  196. // In case we have str xA, [xA, #imm], this is two different uses
  197. // of xA and we cannot fold, otherwise the xA stored may be wrong,
  198. // even if #imm == 0.
  199. return MI.getOperandNo(&MO) == 1 &&
  200. MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
  201. }
  202. }
  203. /// Check whether the given instruction can be the end of a LOH chain
  204. /// involving a load.
  205. static bool isCandidateLoad(const MachineInstr &MI) {
  206. switch (MI.getOpcode()) {
  207. default:
  208. return false;
  209. case AArch64::LDRSBWui:
  210. case AArch64::LDRSBXui:
  211. case AArch64::LDRSHWui:
  212. case AArch64::LDRSHXui:
  213. case AArch64::LDRSWui:
  214. case AArch64::LDRBui:
  215. case AArch64::LDRHui:
  216. case AArch64::LDRWui:
  217. case AArch64::LDRXui:
  218. case AArch64::LDRSui:
  219. case AArch64::LDRDui:
  220. case AArch64::LDRQui:
  221. return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
  222. }
  223. }
  224. /// Check whether the given instruction can load a litteral.
  225. static bool supportLoadFromLiteral(const MachineInstr &MI) {
  226. switch (MI.getOpcode()) {
  227. default:
  228. return false;
  229. case AArch64::LDRSWui:
  230. case AArch64::LDRWui:
  231. case AArch64::LDRXui:
  232. case AArch64::LDRSui:
  233. case AArch64::LDRDui:
  234. case AArch64::LDRQui:
  235. return true;
  236. }
  237. }
  238. /// Number of GPR registers traked by mapRegToGPRIndex()
  239. static const unsigned N_GPR_REGS = 31;
  240. /// Map register number to index from 0-30.
  241. static int mapRegToGPRIndex(MCPhysReg Reg) {
  242. static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
  243. static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
  244. if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
  245. return Reg - AArch64::X0;
  246. if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
  247. return Reg - AArch64::W0;
  248. // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
  249. // handle them as special cases.
  250. if (Reg == AArch64::FP)
  251. return 29;
  252. if (Reg == AArch64::LR)
  253. return 30;
  254. return -1;
  255. }
  256. /// State tracked per register.
  257. /// The main algorithm walks backwards over a basic block maintaining this
  258. /// datastructure for each tracked general purpose register.
  259. struct LOHInfo {
  260. MCLOHType Type : 8; ///< "Best" type of LOH possible.
  261. bool IsCandidate : 1; ///< Possible LOH candidate.
  262. bool OneUser : 1; ///< Found exactly one user (yet).
  263. bool MultiUsers : 1; ///< Found multiple users.
  264. const MachineInstr *MI0; ///< First instruction involved in the LOH.
  265. const MachineInstr *MI1; ///< Second instruction involved in the LOH
  266. /// (if any).
  267. const MachineInstr *LastADRP; ///< Last ADRP in same register.
  268. };
  269. /// Update state \p Info given \p MI uses the tracked register.
  270. static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
  271. LOHInfo &Info) {
  272. // We have multiple uses if we already found one before.
  273. if (Info.MultiUsers || Info.OneUser) {
  274. Info.IsCandidate = false;
  275. Info.MultiUsers = true;
  276. return;
  277. }
  278. Info.OneUser = true;
  279. // Start new LOHInfo if applicable.
  280. if (isCandidateLoad(MI)) {
  281. Info.Type = MCLOH_AdrpLdr;
  282. Info.IsCandidate = true;
  283. Info.MI0 = &MI;
  284. // Note that even this is AdrpLdr now, we can switch to a Ldr variant
  285. // later.
  286. } else if (isCandidateStore(MI, MO)) {
  287. Info.Type = MCLOH_AdrpAddStr;
  288. Info.IsCandidate = true;
  289. Info.MI0 = &MI;
  290. Info.MI1 = nullptr;
  291. } else if (MI.getOpcode() == AArch64::ADDXri) {
  292. Info.Type = MCLOH_AdrpAdd;
  293. Info.IsCandidate = true;
  294. Info.MI0 = &MI;
  295. } else if ((MI.getOpcode() == AArch64::LDRXui ||
  296. MI.getOpcode() == AArch64::LDRWui) &&
  297. MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
  298. Info.Type = MCLOH_AdrpLdrGot;
  299. Info.IsCandidate = true;
  300. Info.MI0 = &MI;
  301. }
  302. }
  303. /// Update state \p Info given the tracked register is clobbered.
  304. static void handleClobber(LOHInfo &Info) {
  305. Info.IsCandidate = false;
  306. Info.OneUser = false;
  307. Info.MultiUsers = false;
  308. Info.LastADRP = nullptr;
  309. }
  310. /// Update state \p Info given that \p MI is possibly the middle instruction
  311. /// of an LOH involving 3 instructions.
  312. static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
  313. LOHInfo &OpInfo) {
  314. if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
  315. return false;
  316. // Copy LOHInfo for dest register to LOHInfo for source register.
  317. if (&DefInfo != &OpInfo) {
  318. OpInfo = DefInfo;
  319. // Invalidate \p DefInfo because we track it in \p OpInfo now.
  320. handleClobber(DefInfo);
  321. } else
  322. DefInfo.LastADRP = nullptr;
  323. // Advance state machine.
  324. assert(OpInfo.IsCandidate && "Expect valid state");
  325. if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
  326. if (OpInfo.Type == MCLOH_AdrpLdr) {
  327. OpInfo.Type = MCLOH_AdrpAddLdr;
  328. OpInfo.IsCandidate = true;
  329. OpInfo.MI1 = &MI;
  330. return true;
  331. } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
  332. OpInfo.Type = MCLOH_AdrpAddStr;
  333. OpInfo.IsCandidate = true;
  334. OpInfo.MI1 = &MI;
  335. return true;
  336. }
  337. } else {
  338. assert((MI.getOpcode() == AArch64::LDRXui ||
  339. MI.getOpcode() == AArch64::LDRWui) &&
  340. "Expect LDRXui or LDRWui");
  341. assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
  342. "Expected GOT relocation");
  343. if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
  344. OpInfo.Type = MCLOH_AdrpLdrGotStr;
  345. OpInfo.IsCandidate = true;
  346. OpInfo.MI1 = &MI;
  347. return true;
  348. } else if (OpInfo.Type == MCLOH_AdrpLdr) {
  349. OpInfo.Type = MCLOH_AdrpLdrGotLdr;
  350. OpInfo.IsCandidate = true;
  351. OpInfo.MI1 = &MI;
  352. return true;
  353. }
  354. }
  355. return false;
  356. }
  357. /// Update state when seeing and ADRP instruction.
  358. static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
  359. LOHInfo &Info, LOHInfo *LOHInfos) {
  360. if (Info.LastADRP != nullptr) {
  361. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n"
  362. << '\t' << MI << '\t' << *Info.LastADRP);
  363. AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
  364. ++NumADRPSimpleCandidate;
  365. }
  366. // Produce LOH directive if possible.
  367. if (Info.IsCandidate) {
  368. switch (Info.Type) {
  369. case MCLOH_AdrpAdd: {
  370. // ADRPs and ADDs for this candidate may be split apart if using
  371. // GlobalISel instead of pseudo-expanded. If that happens, the
  372. // def register of the ADD may have a use in between. Adding an LOH in
  373. // this case can cause the linker to rewrite the ADRP to write to that
  374. // register, clobbering the use.
  375. const MachineInstr *AddMI = Info.MI0;
  376. int DefIdx = mapRegToGPRIndex(MI.getOperand(0).getReg());
  377. int OpIdx = mapRegToGPRIndex(AddMI->getOperand(0).getReg());
  378. LOHInfo DefInfo = LOHInfos[OpIdx];
  379. if (DefIdx != OpIdx && (DefInfo.OneUser || DefInfo.MultiUsers))
  380. break;
  381. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n"
  382. << '\t' << MI << '\t' << *Info.MI0);
  383. AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
  384. ++NumADRSimpleCandidate;
  385. break;
  386. }
  387. case MCLOH_AdrpLdr:
  388. if (supportLoadFromLiteral(*Info.MI0)) {
  389. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n"
  390. << '\t' << MI << '\t' << *Info.MI0);
  391. AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
  392. ++NumADRPToLDR;
  393. }
  394. break;
  395. case MCLOH_AdrpAddLdr: {
  396. // There is a possibility that the linker may try to rewrite:
  397. // adrp x0, @sym@PAGE
  398. // add x1, x0, @sym@PAGEOFF
  399. // [x0 = some other def]
  400. // ldr x2, [x1]
  401. // ...into...
  402. // adrp x0, @sym
  403. // nop
  404. // [x0 = some other def]
  405. // ldr x2, [x0]
  406. // ...if the offset to the symbol won't fit within a literal load.
  407. // This causes the load to use the result of the adrp, which in this
  408. // case has already been clobbered.
  409. // FIXME: Implement proper liveness tracking for all registers. For now,
  410. // don't emit the LOH if there are any instructions between the add and
  411. // the ldr.
  412. MachineInstr *AddMI = const_cast<MachineInstr *>(Info.MI1);
  413. const MachineInstr *LdrMI = Info.MI0;
  414. auto AddIt = MachineBasicBlock::iterator(AddMI);
  415. auto EndIt = AddMI->getParent()->end();
  416. if (AddMI->getIterator() == EndIt || LdrMI != &*next_nodbg(AddIt, EndIt))
  417. break;
  418. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n"
  419. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  420. << *Info.MI0);
  421. AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
  422. ++NumADDToLDR;
  423. break;
  424. }
  425. case MCLOH_AdrpAddStr:
  426. if (Info.MI1 != nullptr) {
  427. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n"
  428. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  429. << *Info.MI0);
  430. AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
  431. ++NumADDToSTR;
  432. }
  433. break;
  434. case MCLOH_AdrpLdrGotLdr:
  435. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n"
  436. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  437. << *Info.MI0);
  438. AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
  439. ++NumLDRToLDR;
  440. break;
  441. case MCLOH_AdrpLdrGotStr:
  442. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n"
  443. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  444. << *Info.MI0);
  445. AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
  446. ++NumLDRToSTR;
  447. break;
  448. case MCLOH_AdrpLdrGot:
  449. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n"
  450. << '\t' << MI << '\t' << *Info.MI0);
  451. AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
  452. break;
  453. case MCLOH_AdrpAdrp:
  454. llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
  455. }
  456. }
  457. handleClobber(Info);
  458. Info.LastADRP = &MI;
  459. }
  460. static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
  461. LOHInfo *LOHInfos) {
  462. if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
  463. return;
  464. int Idx = mapRegToGPRIndex(Reg);
  465. if (Idx >= 0)
  466. handleClobber(LOHInfos[Idx]);
  467. }
  468. static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
  469. // Handle defs and regmasks.
  470. for (const MachineOperand &MO : MI.operands()) {
  471. if (MO.isRegMask()) {
  472. const uint32_t *RegMask = MO.getRegMask();
  473. for (MCPhysReg Reg : AArch64::GPR32RegClass)
  474. handleRegMaskClobber(RegMask, Reg, LOHInfos);
  475. for (MCPhysReg Reg : AArch64::GPR64RegClass)
  476. handleRegMaskClobber(RegMask, Reg, LOHInfos);
  477. continue;
  478. }
  479. if (!MO.isReg() || !MO.isDef())
  480. continue;
  481. int Idx = mapRegToGPRIndex(MO.getReg());
  482. if (Idx < 0)
  483. continue;
  484. handleClobber(LOHInfos[Idx]);
  485. }
  486. // Handle uses.
  487. SmallSet<int, 4> UsesSeen;
  488. for (const MachineOperand &MO : MI.uses()) {
  489. if (!MO.isReg() || !MO.readsReg())
  490. continue;
  491. int Idx = mapRegToGPRIndex(MO.getReg());
  492. if (Idx < 0)
  493. continue;
  494. // Multiple uses of the same register within a single instruction don't
  495. // count as MultiUser or block optimization. This is especially important on
  496. // arm64_32, where any memory operation is likely to be an explicit use of
  497. // xN and an implicit use of wN (the base address register).
  498. if (UsesSeen.insert(Idx).second)
  499. handleUse(MI, MO, LOHInfos[Idx]);
  500. }
  501. }
  502. bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
  503. if (skipFunction(MF.getFunction()))
  504. return false;
  505. LLVM_DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
  506. << "Looking in function " << MF.getName() << '\n');
  507. LOHInfo LOHInfos[N_GPR_REGS];
  508. AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
  509. for (const MachineBasicBlock &MBB : MF) {
  510. // Reset register tracking state.
  511. memset(LOHInfos, 0, sizeof(LOHInfos));
  512. // Live-out registers are used.
  513. for (const MachineBasicBlock *Succ : MBB.successors()) {
  514. for (const auto &LI : Succ->liveins()) {
  515. int RegIdx = mapRegToGPRIndex(LI.PhysReg);
  516. if (RegIdx >= 0)
  517. LOHInfos[RegIdx].OneUser = true;
  518. }
  519. }
  520. // Walk the basic block backwards and update the per register state machine
  521. // in the process.
  522. for (const MachineInstr &MI :
  523. instructionsWithoutDebug(MBB.instr_rbegin(), MBB.instr_rend())) {
  524. unsigned Opcode = MI.getOpcode();
  525. switch (Opcode) {
  526. case AArch64::ADDXri:
  527. case AArch64::LDRXui:
  528. case AArch64::LDRWui:
  529. if (canDefBePartOfLOH(MI)) {
  530. const MachineOperand &Def = MI.getOperand(0);
  531. const MachineOperand &Op = MI.getOperand(1);
  532. assert(Def.isReg() && Def.isDef() && "Expected reg def");
  533. assert(Op.isReg() && Op.isUse() && "Expected reg use");
  534. int DefIdx = mapRegToGPRIndex(Def.getReg());
  535. int OpIdx = mapRegToGPRIndex(Op.getReg());
  536. if (DefIdx >= 0 && OpIdx >= 0 &&
  537. handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
  538. continue;
  539. }
  540. break;
  541. case AArch64::ADRP:
  542. const MachineOperand &Op0 = MI.getOperand(0);
  543. int Idx = mapRegToGPRIndex(Op0.getReg());
  544. if (Idx >= 0) {
  545. handleADRP(MI, AFI, LOHInfos[Idx], LOHInfos);
  546. continue;
  547. }
  548. break;
  549. }
  550. handleNormalInst(MI, LOHInfos);
  551. }
  552. }
  553. // Return "no change": The pass only collects information.
  554. return false;
  555. }
  556. FunctionPass *llvm::createAArch64CollectLOHPass() {
  557. return new AArch64CollectLOH();
  558. }