RISCVMergeBaseOffset.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. //===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Merge the offset of address calculation into the offset field
  10. // of instructions in a global address lowering sequence.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "RISCV.h"
  14. #include "RISCVTargetMachine.h"
  15. #include "llvm/CodeGen/MachineFunctionPass.h"
  16. #include "llvm/CodeGen/Passes.h"
  17. #include "llvm/MC/TargetRegistry.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Target/TargetOptions.h"
  20. #include <optional>
  21. #include <set>
  22. using namespace llvm;
  23. #define DEBUG_TYPE "riscv-merge-base-offset"
  24. #define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
  25. namespace {
  26. struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
  27. private:
  28. const RISCVSubtarget *ST = nullptr;
  29. public:
  30. static char ID;
  31. bool runOnMachineFunction(MachineFunction &Fn) override;
  32. bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
  33. bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
  34. void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
  35. int64_t Offset);
  36. bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
  37. MachineInstr &TailAdd, Register GSReg);
  38. bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
  39. MachineInstr &TailShXAdd, Register GSReg);
  40. bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
  41. RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
  42. MachineFunctionProperties getRequiredProperties() const override {
  43. return MachineFunctionProperties().set(
  44. MachineFunctionProperties::Property::IsSSA);
  45. }
  46. void getAnalysisUsage(AnalysisUsage &AU) const override {
  47. AU.setPreservesCFG();
  48. MachineFunctionPass::getAnalysisUsage(AU);
  49. }
  50. StringRef getPassName() const override {
  51. return RISCV_MERGE_BASE_OFFSET_NAME;
  52. }
  53. private:
  54. MachineRegisterInfo *MRI;
  55. };
  56. } // end anonymous namespace
  57. char RISCVMergeBaseOffsetOpt::ID = 0;
  58. INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
  59. RISCV_MERGE_BASE_OFFSET_NAME, false, false)
  60. // Detect either of the patterns:
  61. //
  62. // 1. (medlow pattern):
  63. // lui vreg1, %hi(s)
  64. // addi vreg2, vreg1, %lo(s)
  65. //
  66. // 2. (medany pattern):
  67. // .Lpcrel_hi1:
  68. // auipc vreg1, %pcrel_hi(s)
  69. // addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
  70. //
  71. // The pattern is only accepted if:
  72. // 1) The first instruction has only one use, which is the ADDI.
  73. // 2) The address operands have the appropriate type, reflecting the
  74. // lowering of a global address or constant pool using medlow or medany.
  75. // 3) The offset value in the Global Address or Constant Pool is 0.
  76. bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
  77. MachineInstr *&Lo) {
  78. if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
  79. return false;
  80. const MachineOperand &HiOp1 = Hi.getOperand(1);
  81. unsigned ExpectedFlags =
  82. Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
  83. if (HiOp1.getTargetFlags() != ExpectedFlags)
  84. return false;
  85. if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0)
  86. return false;
  87. Register HiDestReg = Hi.getOperand(0).getReg();
  88. if (!MRI->hasOneUse(HiDestReg))
  89. return false;
  90. Lo = &*MRI->use_instr_begin(HiDestReg);
  91. if (Lo->getOpcode() != RISCV::ADDI)
  92. return false;
  93. const MachineOperand &LoOp2 = Lo->getOperand(2);
  94. if (Hi.getOpcode() == RISCV::LUI) {
  95. if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
  96. !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0)
  97. return false;
  98. } else {
  99. assert(Hi.getOpcode() == RISCV::AUIPC);
  100. if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
  101. LoOp2.getType() != MachineOperand::MO_MCSymbol)
  102. return false;
  103. }
  104. if (HiOp1.isGlobal()) {
  105. LLVM_DEBUG(dbgs() << " Found lowered global address: "
  106. << *HiOp1.getGlobal() << "\n");
  107. } else {
  108. assert(HiOp1.isCPI());
  109. LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
  110. << "\n");
  111. }
  112. return true;
  113. }
  114. // Update the offset in Hi and Lo instructions.
  115. // Delete the tail instruction and update all the uses to use the
  116. // output from Lo.
  117. void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
  118. MachineInstr &Tail, int64_t Offset) {
  119. assert(isInt<32>(Offset) && "Unexpected offset");
  120. // Put the offset back in Hi and the Lo
  121. Hi.getOperand(1).setOffset(Offset);
  122. if (Hi.getOpcode() != RISCV::AUIPC)
  123. Lo.getOperand(2).setOffset(Offset);
  124. // Delete the tail instruction.
  125. MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
  126. Tail.eraseFromParent();
  127. LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
  128. << " " << Hi << " " << Lo;);
  129. }
  130. // Detect patterns for large offsets that are passed into an ADD instruction.
  131. // If the pattern is found, updates the offset in Hi and Lo instructions
  132. // and deletes TailAdd and the instructions that produced the offset.
  133. //
  134. // Base address lowering is of the form:
  135. // Hi: lui vreg1, %hi(s)
  136. // Lo: addi vreg2, vreg1, %lo(s)
  137. // / \
  138. // / \
  139. // / \
  140. // / The large offset can be of two forms: \
  141. // 1) Offset that has non zero bits in lower 2) Offset that has non zero
  142. // 12 bits and upper 20 bits bits in upper 20 bits only
  143. // OffseLUI: lui vreg3, 4
  144. // OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
  145. // \ /
  146. // \ /
  147. // \ /
  148. // \ /
  149. // TailAdd: add vreg4, vreg2, voff
  150. bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
  151. MachineInstr &Lo,
  152. MachineInstr &TailAdd,
  153. Register GAReg) {
  154. assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
  155. Register Rs = TailAdd.getOperand(1).getReg();
  156. Register Rt = TailAdd.getOperand(2).getReg();
  157. Register Reg = Rs == GAReg ? Rt : Rs;
  158. // Can't fold if the register has more than one use.
  159. if (!MRI->hasOneUse(Reg))
  160. return false;
  161. // This can point to an ADDI(W) or a LUI:
  162. MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
  163. if (OffsetTail.getOpcode() == RISCV::ADDI ||
  164. OffsetTail.getOpcode() == RISCV::ADDIW) {
  165. // The offset value has non zero bits in both %hi and %lo parts.
  166. // Detect an ADDI that feeds from a LUI instruction.
  167. MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
  168. if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
  169. return false;
  170. int64_t OffLo = AddiImmOp.getImm();
  171. MachineInstr &OffsetLui =
  172. *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
  173. MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
  174. if (OffsetLui.getOpcode() != RISCV::LUI ||
  175. LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
  176. !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
  177. return false;
  178. int64_t Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
  179. Offset += OffLo;
  180. // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
  181. if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
  182. Offset = SignExtend64<32>(Offset);
  183. // We can only fold simm32 offsets.
  184. if (!isInt<32>(Offset))
  185. return false;
  186. LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
  187. << " " << OffsetLui);
  188. foldOffset(Hi, Lo, TailAdd, Offset);
  189. OffsetTail.eraseFromParent();
  190. OffsetLui.eraseFromParent();
  191. return true;
  192. } else if (OffsetTail.getOpcode() == RISCV::LUI) {
  193. // The offset value has all zero bits in the lower 12 bits. Only LUI
  194. // exists.
  195. LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
  196. int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
  197. foldOffset(Hi, Lo, TailAdd, Offset);
  198. OffsetTail.eraseFromParent();
  199. return true;
  200. }
  201. return false;
  202. }
  203. // Detect patterns for offsets that are passed into a SHXADD instruction.
  204. // The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
  205. // The constant is created with addi voff, x0, C, and shXadd is used to
  206. // fill insert the trailing zeros and do the addition.
  207. // If the pattern is found, updates the offset in Hi and Lo instructions
  208. // and deletes TailShXAdd and the instructions that produced the offset.
  209. //
  210. // Hi: lui vreg1, %hi(s)
  211. // Lo: addi vreg2, vreg1, %lo(s)
  212. // OffsetTail: addi voff, x0, C
  213. // TailAdd: shXadd vreg4, voff, vreg2
  214. bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
  215. MachineInstr &Lo,
  216. MachineInstr &TailShXAdd,
  217. Register GAReg) {
  218. assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
  219. TailShXAdd.getOpcode() == RISCV::SH2ADD ||
  220. TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
  221. "Expected SHXADD instruction!");
  222. // The first source is the shifted operand.
  223. Register Rs1 = TailShXAdd.getOperand(1).getReg();
  224. if (GAReg != TailShXAdd.getOperand(2).getReg())
  225. return false;
  226. // Can't fold if the register has more than one use.
  227. if (!MRI->hasOneUse(Rs1))
  228. return false;
  229. // This can point to an ADDI X0, C.
  230. MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
  231. if (OffsetTail.getOpcode() != RISCV::ADDI)
  232. return false;
  233. if (!OffsetTail.getOperand(1).isReg() ||
  234. OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
  235. !OffsetTail.getOperand(2).isImm())
  236. return false;
  237. int64_t Offset = OffsetTail.getOperand(2).getImm();
  238. assert(isInt<12>(Offset) && "Unexpected offset");
  239. unsigned ShAmt;
  240. switch (TailShXAdd.getOpcode()) {
  241. default: llvm_unreachable("Unexpected opcode");
  242. case RISCV::SH1ADD: ShAmt = 1; break;
  243. case RISCV::SH2ADD: ShAmt = 2; break;
  244. case RISCV::SH3ADD: ShAmt = 3; break;
  245. }
  246. Offset = (uint64_t)Offset << ShAmt;
  247. LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
  248. foldOffset(Hi, Lo, TailShXAdd, Offset);
  249. OffsetTail.eraseFromParent();
  250. return true;
  251. }
  252. bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
  253. MachineInstr &Lo) {
  254. Register DestReg = Lo.getOperand(0).getReg();
  255. // Look for arithmetic instructions we can get an offset from.
  256. // We might be able to remove the arithmetic instructions by folding the
  257. // offset into the LUI+ADDI.
  258. if (!MRI->hasOneUse(DestReg))
  259. return false;
  260. // Lo has only one use.
  261. MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
  262. switch (Tail.getOpcode()) {
  263. default:
  264. LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
  265. << Tail);
  266. break;
  267. case RISCV::ADDI: {
  268. // Offset is simply an immediate operand.
  269. int64_t Offset = Tail.getOperand(2).getImm();
  270. // We might have two ADDIs in a row.
  271. Register TailDestReg = Tail.getOperand(0).getReg();
  272. if (MRI->hasOneUse(TailDestReg)) {
  273. MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
  274. if (TailTail.getOpcode() == RISCV::ADDI) {
  275. Offset += TailTail.getOperand(2).getImm();
  276. LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
  277. foldOffset(Hi, Lo, TailTail, Offset);
  278. Tail.eraseFromParent();
  279. return true;
  280. }
  281. }
  282. LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
  283. foldOffset(Hi, Lo, Tail, Offset);
  284. return true;
  285. }
  286. case RISCV::ADD:
  287. // The offset is too large to fit in the immediate field of ADDI.
  288. // This can be in two forms:
  289. // 1) LUI hi_Offset followed by:
  290. // ADDI lo_offset
  291. // This happens in case the offset has non zero bits in
  292. // both hi 20 and lo 12 bits.
  293. // 2) LUI (offset20)
  294. // This happens in case the lower 12 bits of the offset are zeros.
  295. return foldLargeOffset(Hi, Lo, Tail, DestReg);
  296. case RISCV::SH1ADD:
  297. case RISCV::SH2ADD:
  298. case RISCV::SH3ADD:
  299. // The offset is too large to fit in the immediate field of ADDI.
  300. // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
  301. // (SH3ADD (ADDI X0, C), DestReg).
  302. return foldShiftedOffset(Hi, Lo, Tail, DestReg);
  303. }
  304. return false;
  305. }
  306. bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
  307. MachineInstr &Lo) {
  308. Register DestReg = Lo.getOperand(0).getReg();
  309. // If all the uses are memory ops with the same offset, we can transform:
  310. //
  311. // 1. (medlow pattern):
  312. // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
  313. // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
  314. // Tail: lw vreg3, 8(vreg2)
  315. //
  316. // 2. (medany pattern):
  317. // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
  318. // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
  319. // Tail: lw vreg3, 8(vreg2)
  320. std::optional<int64_t> CommonOffset;
  321. for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
  322. switch (UseMI.getOpcode()) {
  323. default:
  324. LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
  325. return false;
  326. case RISCV::LB:
  327. case RISCV::LH:
  328. case RISCV::LW:
  329. case RISCV::LBU:
  330. case RISCV::LHU:
  331. case RISCV::LWU:
  332. case RISCV::LD:
  333. case RISCV::FLH:
  334. case RISCV::FLW:
  335. case RISCV::FLD:
  336. case RISCV::SB:
  337. case RISCV::SH:
  338. case RISCV::SW:
  339. case RISCV::SD:
  340. case RISCV::FSH:
  341. case RISCV::FSW:
  342. case RISCV::FSD: {
  343. if (UseMI.getOperand(1).isFI())
  344. return false;
  345. // Register defined by Lo should not be the value register.
  346. if (DestReg == UseMI.getOperand(0).getReg())
  347. return false;
  348. assert(DestReg == UseMI.getOperand(1).getReg() &&
  349. "Expected base address use");
  350. // All load/store instructions must use the same offset.
  351. int64_t Offset = UseMI.getOperand(2).getImm();
  352. if (CommonOffset && Offset != CommonOffset)
  353. return false;
  354. CommonOffset = Offset;
  355. }
  356. }
  357. }
  358. // We found a common offset.
  359. // Update the offsets in global address lowering.
  360. // We may have already folded some arithmetic so we need to add to any
  361. // existing offset.
  362. int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
  363. // RV32 ignores the upper 32 bits.
  364. if (!ST->is64Bit())
  365. NewOffset = SignExtend64<32>(NewOffset);
  366. // We can only fold simm32 offsets.
  367. if (!isInt<32>(NewOffset))
  368. return false;
  369. Hi.getOperand(1).setOffset(NewOffset);
  370. MachineOperand &ImmOp = Lo.getOperand(2);
  371. if (Hi.getOpcode() != RISCV::AUIPC)
  372. ImmOp.setOffset(NewOffset);
  373. // Update the immediate in the load/store instructions to add the offset.
  374. for (MachineInstr &UseMI :
  375. llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
  376. UseMI.removeOperand(2);
  377. UseMI.addOperand(ImmOp);
  378. // Update the base reg in the Tail instruction to feed from LUI.
  379. // Output of Hi is only used in Lo, no need to use MRI->replaceRegWith().
  380. UseMI.getOperand(1).setReg(Hi.getOperand(0).getReg());
  381. }
  382. Lo.eraseFromParent();
  383. return true;
  384. }
  385. bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
  386. if (skipFunction(Fn.getFunction()))
  387. return false;
  388. ST = &Fn.getSubtarget<RISCVSubtarget>();
  389. bool MadeChange = false;
  390. MRI = &Fn.getRegInfo();
  391. for (MachineBasicBlock &MBB : Fn) {
  392. LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
  393. for (MachineInstr &Hi : MBB) {
  394. MachineInstr *Lo = nullptr;
  395. if (!detectFoldable(Hi, Lo))
  396. continue;
  397. MadeChange |= detectAndFoldOffset(Hi, *Lo);
  398. MadeChange |= foldIntoMemoryOps(Hi, *Lo);
  399. }
  400. }
  401. return MadeChange;
  402. }
  403. /// Returns an instance of the Merge Base Offset Optimization pass.
  404. FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() {
  405. return new RISCVMergeBaseOffsetOpt();
  406. }