AArch64CollectLOH.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that collect the Linker Optimization Hint (LOH).
  10. // This pass should be run at the very end of the compilation flow, just before
  11. // assembly printer.
  12. // To be useful for the linker, the LOH must be printed into the assembly file.
  13. //
  14. // A LOH describes a sequence of instructions that may be optimized by the
  15. // linker.
  16. // This same sequence cannot be optimized by the compiler because some of
  17. // the information will be known at link time.
  18. // For instance, consider the following sequence:
  19. // L1: adrp xA, sym@PAGE
  20. // L2: add xB, xA, sym@PAGEOFF
  21. // L3: ldr xC, [xB, #imm]
  22. // This sequence can be turned into:
  23. // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
  24. // L3: ldr xC, sym+#imm
  25. // It may also be turned into either the following more efficient
  26. // code sequences:
  27. // - If sym@PAGEOFF + #imm fits the encoding space of L3.
  28. // L1: adrp xA, sym@PAGE
  29. // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
  30. // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
  31. // L1: adr xA, sym
  32. // L3: ldr xC, [xB, #imm]
  33. //
  34. // To be valid a LOH must meet all the requirements needed by all the related
  35. // possible linker transformations.
  36. // For instance, using the running example, the constraints to emit
  37. // ".loh AdrpAddLdr" are:
  38. // - L1, L2, and L3 instructions are of the expected type, i.e.,
  39. // respectively ADRP, ADD (immediate), and LD.
  40. // - The result of L1 is used only by L2.
  41. // - The register argument (xA) used in the ADD instruction is defined
  42. // only by L1.
  43. // - The result of L2 is used only by L3.
  44. // - The base address (xB) in L3 is defined only L2.
  45. // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
  46. // @PAGE/@PAGEOFF with no additional constants
  47. //
  48. // Currently supported LOHs are:
  49. // * So called non-ADRP-related:
  50. // - .loh AdrpAddLdr L1, L2, L3:
  51. // L1: adrp xA, sym@PAGE
  52. // L2: add xB, xA, sym@PAGEOFF
  53. // L3: ldr xC, [xB, #imm]
  54. // - .loh AdrpLdrGotLdr L1, L2, L3:
  55. // L1: adrp xA, sym@GOTPAGE
  56. // L2: ldr xB, [xA, sym@GOTPAGEOFF]
  57. // L3: ldr xC, [xB, #imm]
  58. // - .loh AdrpLdr L1, L3:
  59. // L1: adrp xA, sym@PAGE
  60. // L3: ldr xC, [xA, sym@PAGEOFF]
  61. // - .loh AdrpAddStr L1, L2, L3:
  62. // L1: adrp xA, sym@PAGE
  63. // L2: add xB, xA, sym@PAGEOFF
  64. // L3: str xC, [xB, #imm]
  65. // - .loh AdrpLdrGotStr L1, L2, L3:
  66. // L1: adrp xA, sym@GOTPAGE
  67. // L2: ldr xB, [xA, sym@GOTPAGEOFF]
  68. // L3: str xC, [xB, #imm]
  69. // - .loh AdrpAdd L1, L2:
  70. // L1: adrp xA, sym@PAGE
  71. // L2: add xB, xA, sym@PAGEOFF
  72. // For all these LOHs, L1, L2, L3 form a simple chain:
  73. // L1 result is used only by L2 and L2 result by L3.
  74. // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
  75. // by L1.
  76. // All these LOHs aim at using more efficient load/store patterns by folding
  77. // some instructions used to compute the address directly into the load/store.
  78. //
  79. // * So called ADRP-related:
  80. // - .loh AdrpAdrp L2, L1:
  81. // L2: ADRP xA, sym1@PAGE
  82. // L1: ADRP xA, sym2@PAGE
  83. // L2 dominates L1 and xA is not redifined between L2 and L1
  84. // This LOH aims at getting rid of redundant ADRP instructions.
  85. //
  86. // The overall design for emitting the LOHs is:
  87. // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
  88. // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
  89. // 1. Associates them a label.
  90. // 2. Emits them in a MCStreamer (EmitLOHDirective).
  91. // - The MCMachOStreamer records them into the MCAssembler.
  92. // - The MCAsmStreamer prints them.
  93. // - Other MCStreamers ignore them.
  94. // 3. Closes the MCStreamer:
  95. // - The MachObjectWriter gets them from the MCAssembler and writes
  96. // them in the object file.
  97. // - Other ObjectWriters ignore them.
  98. //===----------------------------------------------------------------------===//
  99. #include "AArch64.h"
  100. #include "AArch64InstrInfo.h"
  101. #include "AArch64MachineFunctionInfo.h"
  102. #include "llvm/ADT/BitVector.h"
  103. #include "llvm/ADT/DenseMap.h"
  104. #include "llvm/ADT/MapVector.h"
  105. #include "llvm/ADT/SmallSet.h"
  106. #include "llvm/ADT/SmallVector.h"
  107. #include "llvm/ADT/Statistic.h"
  108. #include "llvm/CodeGen/MachineBasicBlock.h"
  109. #include "llvm/CodeGen/MachineFunctionPass.h"
  110. #include "llvm/CodeGen/MachineInstr.h"
  111. #include "llvm/CodeGen/TargetRegisterInfo.h"
  112. #include "llvm/Support/Debug.h"
  113. #include "llvm/Support/ErrorHandling.h"
  114. #include "llvm/Support/raw_ostream.h"
  115. #include "llvm/Target/TargetMachine.h"
  116. using namespace llvm;
  117. #define DEBUG_TYPE "aarch64-collect-loh"
  118. STATISTIC(NumADRPSimpleCandidate,
  119. "Number of simplifiable ADRP dominate by another");
  120. STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
  121. STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
  122. STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
  123. STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
  124. STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
  125. STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
  126. #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
  127. namespace {
  128. struct AArch64CollectLOH : public MachineFunctionPass {
  129. static char ID;
  130. AArch64CollectLOH() : MachineFunctionPass(ID) {}
  131. bool runOnMachineFunction(MachineFunction &MF) override;
  132. MachineFunctionProperties getRequiredProperties() const override {
  133. return MachineFunctionProperties().set(
  134. MachineFunctionProperties::Property::NoVRegs);
  135. }
  136. StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
  137. void getAnalysisUsage(AnalysisUsage &AU) const override {
  138. MachineFunctionPass::getAnalysisUsage(AU);
  139. AU.setPreservesAll();
  140. }
  141. };
  142. char AArch64CollectLOH::ID = 0;
  143. } // end anonymous namespace.
  144. INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
  145. AARCH64_COLLECT_LOH_NAME, false, false)
  146. static bool canAddBePartOfLOH(const MachineInstr &MI) {
  147. // Check immediate to see if the immediate is an address.
  148. switch (MI.getOperand(2).getType()) {
  149. default:
  150. return false;
  151. case MachineOperand::MO_GlobalAddress:
  152. case MachineOperand::MO_JumpTableIndex:
  153. case MachineOperand::MO_ConstantPoolIndex:
  154. case MachineOperand::MO_BlockAddress:
  155. return true;
  156. }
  157. }
  158. /// Answer the following question: Can Def be one of the definition
  159. /// involved in a part of a LOH?
  160. static bool canDefBePartOfLOH(const MachineInstr &MI) {
  161. // Accept ADRP, ADDLow and LOADGot.
  162. switch (MI.getOpcode()) {
  163. default:
  164. return false;
  165. case AArch64::ADRP:
  166. return true;
  167. case AArch64::ADDXri:
  168. return canAddBePartOfLOH(MI);
  169. case AArch64::LDRXui:
  170. case AArch64::LDRWui:
  171. // Check immediate to see if the immediate is an address.
  172. switch (MI.getOperand(2).getType()) {
  173. default:
  174. return false;
  175. case MachineOperand::MO_GlobalAddress:
  176. return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
  177. }
  178. }
  179. }
  180. /// Check whether the given instruction can the end of a LOH chain involving a
  181. /// store.
  182. static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
  183. switch (MI.getOpcode()) {
  184. default:
  185. return false;
  186. case AArch64::STRBBui:
  187. case AArch64::STRHHui:
  188. case AArch64::STRBui:
  189. case AArch64::STRHui:
  190. case AArch64::STRWui:
  191. case AArch64::STRXui:
  192. case AArch64::STRSui:
  193. case AArch64::STRDui:
  194. case AArch64::STRQui:
  195. // We can only optimize the index operand.
  196. // In case we have str xA, [xA, #imm], this is two different uses
  197. // of xA and we cannot fold, otherwise the xA stored may be wrong,
  198. // even if #imm == 0.
  199. return MI.getOperandNo(&MO) == 1 &&
  200. MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
  201. }
  202. }
  203. /// Check whether the given instruction can be the end of a LOH chain
  204. /// involving a load.
  205. static bool isCandidateLoad(const MachineInstr &MI) {
  206. switch (MI.getOpcode()) {
  207. default:
  208. return false;
  209. case AArch64::LDRSBWui:
  210. case AArch64::LDRSBXui:
  211. case AArch64::LDRSHWui:
  212. case AArch64::LDRSHXui:
  213. case AArch64::LDRSWui:
  214. case AArch64::LDRBui:
  215. case AArch64::LDRHui:
  216. case AArch64::LDRWui:
  217. case AArch64::LDRXui:
  218. case AArch64::LDRSui:
  219. case AArch64::LDRDui:
  220. case AArch64::LDRQui:
  221. return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
  222. }
  223. }
  224. /// Check whether the given instruction can load a litteral.
  225. static bool supportLoadFromLiteral(const MachineInstr &MI) {
  226. switch (MI.getOpcode()) {
  227. default:
  228. return false;
  229. case AArch64::LDRSWui:
  230. case AArch64::LDRWui:
  231. case AArch64::LDRXui:
  232. case AArch64::LDRSui:
  233. case AArch64::LDRDui:
  234. case AArch64::LDRQui:
  235. return true;
  236. }
  237. }
  238. /// Number of GPR registers traked by mapRegToGPRIndex()
  239. static const unsigned N_GPR_REGS = 31;
  240. /// Map register number to index from 0-30.
  241. static int mapRegToGPRIndex(MCPhysReg Reg) {
  242. static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
  243. static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
  244. if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
  245. return Reg - AArch64::X0;
  246. if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
  247. return Reg - AArch64::W0;
  248. // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
  249. // handle them as special cases.
  250. if (Reg == AArch64::FP)
  251. return 29;
  252. if (Reg == AArch64::LR)
  253. return 30;
  254. return -1;
  255. }
  256. /// State tracked per register.
  257. /// The main algorithm walks backwards over a basic block maintaining this
  258. /// datastructure for each tracked general purpose register.
  259. struct LOHInfo {
  260. MCLOHType Type : 8; ///< "Best" type of LOH possible.
  261. bool IsCandidate : 1; ///< Possible LOH candidate.
  262. bool OneUser : 1; ///< Found exactly one user (yet).
  263. bool MultiUsers : 1; ///< Found multiple users.
  264. const MachineInstr *MI0; ///< First instruction involved in the LOH.
  265. const MachineInstr *MI1; ///< Second instruction involved in the LOH
  266. /// (if any).
  267. const MachineInstr *LastADRP; ///< Last ADRP in same register.
  268. };
  269. /// Update state \p Info given \p MI uses the tracked register.
  270. static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
  271. LOHInfo &Info) {
  272. // We have multiple uses if we already found one before.
  273. if (Info.MultiUsers || Info.OneUser) {
  274. Info.IsCandidate = false;
  275. Info.MultiUsers = true;
  276. return;
  277. }
  278. Info.OneUser = true;
  279. // Start new LOHInfo if applicable.
  280. if (isCandidateLoad(MI)) {
  281. Info.Type = MCLOH_AdrpLdr;
  282. Info.IsCandidate = true;
  283. Info.MI0 = &MI;
  284. // Note that even this is AdrpLdr now, we can switch to a Ldr variant
  285. // later.
  286. } else if (isCandidateStore(MI, MO)) {
  287. Info.Type = MCLOH_AdrpAddStr;
  288. Info.IsCandidate = true;
  289. Info.MI0 = &MI;
  290. Info.MI1 = nullptr;
  291. } else if (MI.getOpcode() == AArch64::ADDXri) {
  292. Info.Type = MCLOH_AdrpAdd;
  293. Info.IsCandidate = true;
  294. Info.MI0 = &MI;
  295. } else if ((MI.getOpcode() == AArch64::LDRXui ||
  296. MI.getOpcode() == AArch64::LDRWui) &&
  297. MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
  298. Info.Type = MCLOH_AdrpLdrGot;
  299. Info.IsCandidate = true;
  300. Info.MI0 = &MI;
  301. }
  302. }
  303. /// Update state \p Info given the tracked register is clobbered.
  304. static void handleClobber(LOHInfo &Info) {
  305. Info.IsCandidate = false;
  306. Info.OneUser = false;
  307. Info.MultiUsers = false;
  308. Info.LastADRP = nullptr;
  309. }
  310. /// Update state \p Info given that \p MI is possibly the middle instruction
  311. /// of an LOH involving 3 instructions.
  312. static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
  313. LOHInfo &OpInfo) {
  314. if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
  315. return false;
  316. // Copy LOHInfo for dest register to LOHInfo for source register.
  317. if (&DefInfo != &OpInfo) {
  318. OpInfo = DefInfo;
  319. // Invalidate \p DefInfo because we track it in \p OpInfo now.
  320. handleClobber(DefInfo);
  321. } else
  322. DefInfo.LastADRP = nullptr;
  323. // Advance state machine.
  324. assert(OpInfo.IsCandidate && "Expect valid state");
  325. if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
  326. if (OpInfo.Type == MCLOH_AdrpLdr) {
  327. OpInfo.Type = MCLOH_AdrpAddLdr;
  328. OpInfo.IsCandidate = true;
  329. OpInfo.MI1 = &MI;
  330. return true;
  331. } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
  332. OpInfo.Type = MCLOH_AdrpAddStr;
  333. OpInfo.IsCandidate = true;
  334. OpInfo.MI1 = &MI;
  335. return true;
  336. }
  337. } else {
  338. assert((MI.getOpcode() == AArch64::LDRXui ||
  339. MI.getOpcode() == AArch64::LDRWui) &&
  340. "Expect LDRXui or LDRWui");
  341. assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
  342. "Expected GOT relocation");
  343. if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
  344. OpInfo.Type = MCLOH_AdrpLdrGotStr;
  345. OpInfo.IsCandidate = true;
  346. OpInfo.MI1 = &MI;
  347. return true;
  348. } else if (OpInfo.Type == MCLOH_AdrpLdr) {
  349. OpInfo.Type = MCLOH_AdrpLdrGotLdr;
  350. OpInfo.IsCandidate = true;
  351. OpInfo.MI1 = &MI;
  352. return true;
  353. }
  354. }
  355. return false;
  356. }
  357. /// Update state when seeing and ADRP instruction.
  358. static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
  359. LOHInfo &Info, LOHInfo *LOHInfos) {
  360. if (Info.LastADRP != nullptr) {
  361. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n"
  362. << '\t' << MI << '\t' << *Info.LastADRP);
  363. AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
  364. ++NumADRPSimpleCandidate;
  365. }
  366. // Produce LOH directive if possible.
  367. if (Info.IsCandidate) {
  368. switch (Info.Type) {
  369. case MCLOH_AdrpAdd: {
  370. // ADRPs and ADDs for this candidate may be split apart if using
  371. // GlobalISel instead of pseudo-expanded. If that happens, the
  372. // def register of the ADD may have a use in between. Adding an LOH in
  373. // this case can cause the linker to rewrite the ADRP to write to that
  374. // register, clobbering the use.
  375. const MachineInstr *AddMI = Info.MI0;
  376. int DefIdx = mapRegToGPRIndex(MI.getOperand(0).getReg());
  377. int OpIdx = mapRegToGPRIndex(AddMI->getOperand(0).getReg());
  378. LOHInfo DefInfo = LOHInfos[OpIdx];
  379. if (DefIdx != OpIdx && (DefInfo.OneUser || DefInfo.MultiUsers))
  380. break;
  381. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n"
  382. << '\t' << MI << '\t' << *Info.MI0);
  383. AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
  384. ++NumADRSimpleCandidate;
  385. break;
  386. }
  387. case MCLOH_AdrpLdr:
  388. if (supportLoadFromLiteral(*Info.MI0)) {
  389. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n"
  390. << '\t' << MI << '\t' << *Info.MI0);
  391. AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
  392. ++NumADRPToLDR;
  393. }
  394. break;
  395. case MCLOH_AdrpAddLdr:
  396. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n"
  397. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  398. << *Info.MI0);
  399. AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
  400. ++NumADDToLDR;
  401. break;
  402. case MCLOH_AdrpAddStr:
  403. if (Info.MI1 != nullptr) {
  404. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n"
  405. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  406. << *Info.MI0);
  407. AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
  408. ++NumADDToSTR;
  409. }
  410. break;
  411. case MCLOH_AdrpLdrGotLdr:
  412. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n"
  413. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  414. << *Info.MI0);
  415. AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
  416. ++NumLDRToLDR;
  417. break;
  418. case MCLOH_AdrpLdrGotStr:
  419. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n"
  420. << '\t' << MI << '\t' << *Info.MI1 << '\t'
  421. << *Info.MI0);
  422. AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
  423. ++NumLDRToSTR;
  424. break;
  425. case MCLOH_AdrpLdrGot:
  426. LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n"
  427. << '\t' << MI << '\t' << *Info.MI0);
  428. AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
  429. break;
  430. case MCLOH_AdrpAdrp:
  431. llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
  432. }
  433. }
  434. handleClobber(Info);
  435. Info.LastADRP = &MI;
  436. }
  437. static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
  438. LOHInfo *LOHInfos) {
  439. if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
  440. return;
  441. int Idx = mapRegToGPRIndex(Reg);
  442. if (Idx >= 0)
  443. handleClobber(LOHInfos[Idx]);
  444. }
  445. static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
  446. // Handle defs and regmasks.
  447. for (const MachineOperand &MO : MI.operands()) {
  448. if (MO.isRegMask()) {
  449. const uint32_t *RegMask = MO.getRegMask();
  450. for (MCPhysReg Reg : AArch64::GPR32RegClass)
  451. handleRegMaskClobber(RegMask, Reg, LOHInfos);
  452. for (MCPhysReg Reg : AArch64::GPR64RegClass)
  453. handleRegMaskClobber(RegMask, Reg, LOHInfos);
  454. continue;
  455. }
  456. if (!MO.isReg() || !MO.isDef())
  457. continue;
  458. int Idx = mapRegToGPRIndex(MO.getReg());
  459. if (Idx < 0)
  460. continue;
  461. handleClobber(LOHInfos[Idx]);
  462. }
  463. // Handle uses.
  464. SmallSet<int, 4> UsesSeen;
  465. for (const MachineOperand &MO : MI.uses()) {
  466. if (!MO.isReg() || !MO.readsReg())
  467. continue;
  468. int Idx = mapRegToGPRIndex(MO.getReg());
  469. if (Idx < 0)
  470. continue;
  471. // Multiple uses of the same register within a single instruction don't
  472. // count as MultiUser or block optimization. This is especially important on
  473. // arm64_32, where any memory operation is likely to be an explicit use of
  474. // xN and an implicit use of wN (the base address register).
  475. if (!UsesSeen.count(Idx)) {
  476. handleUse(MI, MO, LOHInfos[Idx]);
  477. UsesSeen.insert(Idx);
  478. }
  479. }
  480. }
  481. bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
  482. if (skipFunction(MF.getFunction()))
  483. return false;
  484. LLVM_DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
  485. << "Looking in function " << MF.getName() << '\n');
  486. LOHInfo LOHInfos[N_GPR_REGS];
  487. AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
  488. for (const MachineBasicBlock &MBB : MF) {
  489. // Reset register tracking state.
  490. memset(LOHInfos, 0, sizeof(LOHInfos));
  491. // Live-out registers are used.
  492. for (const MachineBasicBlock *Succ : MBB.successors()) {
  493. for (const auto &LI : Succ->liveins()) {
  494. int RegIdx = mapRegToGPRIndex(LI.PhysReg);
  495. if (RegIdx >= 0)
  496. LOHInfos[RegIdx].OneUser = true;
  497. }
  498. }
  499. // Walk the basic block backwards and update the per register state machine
  500. // in the process.
  501. for (const MachineInstr &MI :
  502. instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
  503. unsigned Opcode = MI.getOpcode();
  504. switch (Opcode) {
  505. case AArch64::ADDXri:
  506. case AArch64::LDRXui:
  507. case AArch64::LDRWui:
  508. if (canDefBePartOfLOH(MI)) {
  509. const MachineOperand &Def = MI.getOperand(0);
  510. const MachineOperand &Op = MI.getOperand(1);
  511. assert(Def.isReg() && Def.isDef() && "Expected reg def");
  512. assert(Op.isReg() && Op.isUse() && "Expected reg use");
  513. int DefIdx = mapRegToGPRIndex(Def.getReg());
  514. int OpIdx = mapRegToGPRIndex(Op.getReg());
  515. if (DefIdx >= 0 && OpIdx >= 0 &&
  516. handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
  517. continue;
  518. }
  519. break;
  520. case AArch64::ADRP:
  521. const MachineOperand &Op0 = MI.getOperand(0);
  522. int Idx = mapRegToGPRIndex(Op0.getReg());
  523. if (Idx >= 0) {
  524. handleADRP(MI, AFI, LOHInfos[Idx], LOHInfos);
  525. continue;
  526. }
  527. break;
  528. }
  529. handleNormalInst(MI, LOHInfos);
  530. }
  531. }
  532. // Return "no change": The pass only collects information.
  533. return false;
  534. }
  535. FunctionPass *llvm::createAArch64CollectLOHPass() {
  536. return new AArch64CollectLOH();
  537. }