AArch64MIPeepholeOpt.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs below peephole optimizations on MIR level.
  10. //
  11. // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
  12. // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
  13. //
  14. // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
  15. // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
  16. //
  17. // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
  18. // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
  19. //
  20. // The mov pseudo instruction could be expanded to multiple mov instructions
  21. // later. In this case, we could try to split the constant operand of mov
  22. // instruction into two immediates which can be directly encoded into
  23. // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
  24. // multiple `mov` + `and/add/sub` instructions.
  25. //
  26. // 4. Remove redundant ORRWrs which is generated by zero-extend.
  27. //
  28. // %3:gpr32 = ORRWrs $wzr, %2, 0
  29. // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
  30. //
  31. // If AArch64's 32-bit form of instruction defines the source operand of
  32. // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
  33. // operand are set to zero.
  34. //
  35. //===----------------------------------------------------------------------===//
  36. #include "AArch64ExpandImm.h"
  37. #include "AArch64InstrInfo.h"
  38. #include "MCTargetDesc/AArch64AddressingModes.h"
  39. #include "llvm/ADT/Optional.h"
  40. #include "llvm/ADT/SetVector.h"
  41. #include "llvm/CodeGen/MachineDominators.h"
  42. #include "llvm/CodeGen/MachineLoopInfo.h"
  43. using namespace llvm;
  44. #define DEBUG_TYPE "aarch64-mi-peephole-opt"
  45. namespace {
  46. struct AArch64MIPeepholeOpt : public MachineFunctionPass {
  47. static char ID;
  48. AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
  49. initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
  50. }
  51. const AArch64InstrInfo *TII;
  52. const AArch64RegisterInfo *TRI;
  53. MachineLoopInfo *MLI;
  54. MachineRegisterInfo *MRI;
  55. template <typename T>
  56. using SplitAndOpcFunc =
  57. std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
  58. using BuildMIFunc =
  59. std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
  60. Register, Register)>;
  61. /// For instructions where an immediate operand could be split into two
  62. /// separate immediate instructions, use the splitTwoPartImm two handle the
  63. /// optimization.
  64. ///
  65. /// To implement, the following function types must be passed to
  66. /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
  67. /// splitting the immediate is valid and returns the associated new opcode. A
  68. /// BuildMIFunc must be implemented to build the two immediate instructions.
  69. ///
  70. /// Example Pattern (where IMM would require 2+ MOV instructions):
  71. /// %dst = <Instr>rr %src IMM [...]
  72. /// becomes:
  73. /// %tmp = <Instr>ri %src (encode half IMM) [...]
  74. /// %dst = <Instr>ri %tmp (encode half IMM) [...]
  75. template <typename T>
  76. bool splitTwoPartImm(MachineInstr &MI,
  77. SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
  78. SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
  79. bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
  80. MachineInstr *&SubregToRegMI);
  81. template <typename T>
  82. bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
  83. SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
  84. template <typename T>
  85. bool visitAND(unsigned Opc, MachineInstr &MI,
  86. SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
  87. bool visitORR(MachineInstr &MI,
  88. SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
  89. bool runOnMachineFunction(MachineFunction &MF) override;
  90. StringRef getPassName() const override {
  91. return "AArch64 MI Peephole Optimization pass";
  92. }
  93. void getAnalysisUsage(AnalysisUsage &AU) const override {
  94. AU.setPreservesCFG();
  95. AU.addRequired<MachineLoopInfo>();
  96. MachineFunctionPass::getAnalysisUsage(AU);
  97. }
  98. };
  99. char AArch64MIPeepholeOpt::ID = 0;
  100. } // end anonymous namespace
  101. INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
  102. "AArch64 MI Peephole Optimization", false, false)
  103. template <typename T>
  104. static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
  105. T UImm = static_cast<T>(Imm);
  106. if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
  107. return false;
  108. // If this immediate can be handled by one instruction, do not split it.
  109. SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
  110. AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
  111. if (Insn.size() == 1)
  112. return false;
  113. // The bitmask immediate consists of consecutive ones. Let's say there is
  114. // constant 0b00000000001000000000010000000000 which does not consist of
  115. // consecutive ones. We can split it in to two bitmask immediate like
  116. // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
  117. // If we do AND with these two bitmask immediate, we can see original one.
  118. unsigned LowestBitSet = countTrailingZeros(UImm);
  119. unsigned HighestBitSet = Log2_64(UImm);
  120. // Create a mask which is filled with one from the position of lowest bit set
  121. // to the position of highest bit set.
  122. T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
  123. (static_cast<T>(1) << LowestBitSet);
  124. // Create a mask which is filled with one outside the position of lowest bit
  125. // set and the position of highest bit set.
  126. T NewImm2 = UImm | ~NewImm1;
  127. // If the split value is not valid bitmask immediate, do not split this
  128. // constant.
  129. if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
  130. return false;
  131. Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
  132. Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
  133. return true;
  134. }
  135. template <typename T>
  136. bool AArch64MIPeepholeOpt::visitAND(
  137. unsigned Opc, MachineInstr &MI,
  138. SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
  139. // Try below transformation.
  140. //
  141. // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
  142. // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
  143. //
  144. // The mov pseudo instruction could be expanded to multiple mov instructions
  145. // later. Let's try to split the constant operand of mov instruction into two
  146. // bitmask immediates. It makes only two AND instructions intead of multiple
  147. // mov + and instructions.
  148. return splitTwoPartImm<T>(
  149. MI, ToBeRemoved,
  150. [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
  151. if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
  152. return Opc;
  153. return None;
  154. },
  155. [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
  156. unsigned Imm1, Register SrcReg, Register NewTmpReg,
  157. Register NewDstReg) {
  158. DebugLoc DL = MI.getDebugLoc();
  159. MachineBasicBlock *MBB = MI.getParent();
  160. BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
  161. .addReg(SrcReg)
  162. .addImm(Imm0);
  163. BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
  164. .addReg(NewTmpReg)
  165. .addImm(Imm1);
  166. });
  167. }
  168. bool AArch64MIPeepholeOpt::visitORR(
  169. MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
  170. // Check this ORR comes from below zero-extend pattern.
  171. //
  172. // def : Pat<(i64 (zext GPR32:$src)),
  173. // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
  174. if (MI.getOperand(3).getImm() != 0)
  175. return false;
  176. if (MI.getOperand(1).getReg() != AArch64::WZR)
  177. return false;
  178. MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
  179. if (!SrcMI)
  180. return false;
  181. // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
  182. //
  183. // When you use the 32-bit form of an instruction, the upper 32 bits of the
  184. // source registers are ignored and the upper 32 bits of the destination
  185. // register are set to zero.
  186. //
  187. // If AArch64's 32-bit form of instruction defines the source operand of
  188. // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
  189. // real AArch64 instruction and if it is not, do not process the opcode
  190. // conservatively.
  191. if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
  192. return false;
  193. Register DefReg = MI.getOperand(0).getReg();
  194. Register SrcReg = MI.getOperand(2).getReg();
  195. MRI->replaceRegWith(DefReg, SrcReg);
  196. MRI->clearKillFlags(SrcReg);
  197. // replaceRegWith changes MI's definition register. Keep it for SSA form until
  198. // deleting MI.
  199. MI.getOperand(0).setReg(DefReg);
  200. ToBeRemoved.insert(&MI);
  201. LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
  202. return true;
  203. }
  204. template <typename T>
  205. static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
  206. // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
  207. // imm0 and imm1 are non-zero 12-bit unsigned int.
  208. if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
  209. (Imm & ~static_cast<T>(0xffffff)) != 0)
  210. return false;
  211. // The immediate can not be composed via a single instruction.
  212. SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
  213. AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
  214. if (Insn.size() == 1)
  215. return false;
  216. // Split Imm into (Imm0 << 12) + Imm1;
  217. Imm0 = (Imm >> 12) & 0xfff;
  218. Imm1 = Imm & 0xfff;
  219. return true;
  220. }
  221. template <typename T>
  222. bool AArch64MIPeepholeOpt::visitADDSUB(
  223. unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
  224. SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
  225. // Try below transformation.
  226. //
  227. // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
  228. // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
  229. //
  230. // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
  231. // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
  232. //
  233. // The mov pseudo instruction could be expanded to multiple mov instructions
  234. // later. Let's try to split the constant operand of mov instruction into two
  235. // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
  236. // multiple `mov` + `and/sub` instructions.
  237. return splitTwoPartImm<T>(
  238. MI, ToBeRemoved,
  239. [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
  240. T &Imm1) -> Optional<unsigned> {
  241. if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
  242. return PosOpc;
  243. if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
  244. return NegOpc;
  245. return None;
  246. },
  247. [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
  248. unsigned Imm1, Register SrcReg, Register NewTmpReg,
  249. Register NewDstReg) {
  250. DebugLoc DL = MI.getDebugLoc();
  251. MachineBasicBlock *MBB = MI.getParent();
  252. BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
  253. .addReg(SrcReg)
  254. .addImm(Imm0)
  255. .addImm(12);
  256. BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
  257. .addReg(NewTmpReg)
  258. .addImm(Imm1)
  259. .addImm(0);
  260. });
  261. }
  262. // Checks if the corresponding MOV immediate instruction is applicable for
  263. // this peephole optimization.
  264. bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
  265. MachineInstr *&MovMI,
  266. MachineInstr *&SubregToRegMI) {
  267. // Check whether current MBB is in loop and the AND is loop invariant.
  268. MachineBasicBlock *MBB = MI.getParent();
  269. MachineLoop *L = MLI->getLoopFor(MBB);
  270. if (L && !L->isLoopInvariant(MI))
  271. return false;
  272. // Check whether current MI's operand is MOV with immediate.
  273. MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
  274. if (!MovMI)
  275. return false;
  276. // If it is SUBREG_TO_REG, check its operand.
  277. SubregToRegMI = nullptr;
  278. if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
  279. SubregToRegMI = MovMI;
  280. MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
  281. if (!MovMI)
  282. return false;
  283. }
  284. if (MovMI->getOpcode() != AArch64::MOVi32imm &&
  285. MovMI->getOpcode() != AArch64::MOVi64imm)
  286. return false;
  287. // If the MOV has multiple uses, do not split the immediate because it causes
  288. // more instructions.
  289. if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
  290. return false;
  291. if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
  292. return false;
  293. // It is OK to perform this peephole optimization.
  294. return true;
  295. }
  296. template <typename T>
  297. bool AArch64MIPeepholeOpt::splitTwoPartImm(
  298. MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
  299. SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
  300. unsigned RegSize = sizeof(T) * 8;
  301. assert((RegSize == 32 || RegSize == 64) &&
  302. "Invalid RegSize for legal immediate peephole optimization");
  303. // Perform several essential checks against current MI.
  304. MachineInstr *MovMI, *SubregToRegMI;
  305. if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
  306. return false;
  307. // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
  308. T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
  309. // For the 32 bit form of instruction, the upper 32 bits of the destination
  310. // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
  311. // of Imm to zero. This is essential if the Immediate value was a negative
  312. // number since it was sign extended when we assign to the 64-bit Imm.
  313. if (SubregToRegMI)
  314. Imm &= 0xFFFFFFFF;
  315. unsigned Opcode;
  316. if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
  317. Opcode = R.getValue();
  318. else
  319. return false;
  320. // Create new ADD/SUB MIs.
  321. MachineFunction *MF = MI.getMF();
  322. const TargetRegisterClass *RC =
  323. TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
  324. const TargetRegisterClass *ORC =
  325. TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
  326. Register DstReg = MI.getOperand(0).getReg();
  327. Register SrcReg = MI.getOperand(1).getReg();
  328. Register NewTmpReg = MRI->createVirtualRegister(RC);
  329. Register NewDstReg = MRI->createVirtualRegister(RC);
  330. MRI->constrainRegClass(SrcReg, RC);
  331. MRI->constrainRegClass(NewTmpReg, ORC);
  332. MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
  333. BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
  334. MRI->replaceRegWith(DstReg, NewDstReg);
  335. // replaceRegWith changes MI's definition register. Keep it for SSA form until
  336. // deleting MI.
  337. MI.getOperand(0).setReg(DstReg);
  338. // Record the MIs need to be removed.
  339. ToBeRemoved.insert(&MI);
  340. if (SubregToRegMI)
  341. ToBeRemoved.insert(SubregToRegMI);
  342. ToBeRemoved.insert(MovMI);
  343. return true;
  344. }
  345. bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
  346. if (skipFunction(MF.getFunction()))
  347. return false;
  348. TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  349. TRI = static_cast<const AArch64RegisterInfo *>(
  350. MF.getSubtarget().getRegisterInfo());
  351. MLI = &getAnalysis<MachineLoopInfo>();
  352. MRI = &MF.getRegInfo();
  353. assert(MRI->isSSA() && "Expected to be run on SSA form!");
  354. bool Changed = false;
  355. SmallSetVector<MachineInstr *, 8> ToBeRemoved;
  356. for (MachineBasicBlock &MBB : MF) {
  357. for (MachineInstr &MI : MBB) {
  358. switch (MI.getOpcode()) {
  359. default:
  360. break;
  361. case AArch64::ANDWrr:
  362. Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved);
  363. break;
  364. case AArch64::ANDXrr:
  365. Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved);
  366. break;
  367. case AArch64::ORRWrs:
  368. Changed = visitORR(MI, ToBeRemoved);
  369. break;
  370. case AArch64::ADDWrr:
  371. Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI,
  372. ToBeRemoved);
  373. break;
  374. case AArch64::SUBWrr:
  375. Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI,
  376. ToBeRemoved);
  377. break;
  378. case AArch64::ADDXrr:
  379. Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI,
  380. ToBeRemoved);
  381. break;
  382. case AArch64::SUBXrr:
  383. Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
  384. ToBeRemoved);
  385. break;
  386. }
  387. }
  388. }
  389. for (MachineInstr *MI : ToBeRemoved)
  390. MI->eraseFromParent();
  391. return Changed;
  392. }
  393. FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
  394. return new AArch64MIPeepholeOpt();
  395. }