AArch64MIPeepholeOpt.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs below peephole optimizations on MIR level.
  10. //
  11. // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
  12. // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
  13. //
  14. // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
  15. // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
  16. //
  17. // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
  18. // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
  19. //
  20. // The mov pseudo instruction could be expanded to multiple mov instructions
  21. // later. In this case, we could try to split the constant operand of mov
  22. // instruction into two immediates which can be directly encoded into
  23. // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
  24. // multiple `mov` + `and/add/sub` instructions.
  25. //
  26. // 4. Remove redundant ORRWrs which is generated by zero-extend.
  27. //
  28. // %3:gpr32 = ORRWrs $wzr, %2, 0
  29. // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
  30. //
  31. // If AArch64's 32-bit form of instruction defines the source operand of
  32. // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
  33. // operand are set to zero.
  34. //
  35. // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
  36. // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
  37. //
  38. //===----------------------------------------------------------------------===//
  39. #include "AArch64ExpandImm.h"
  40. #include "AArch64InstrInfo.h"
  41. #include "MCTargetDesc/AArch64AddressingModes.h"
  42. #include "llvm/CodeGen/MachineDominators.h"
  43. #include "llvm/CodeGen/MachineLoopInfo.h"
  44. using namespace llvm;
  45. #define DEBUG_TYPE "aarch64-mi-peephole-opt"
  46. namespace {
  47. struct AArch64MIPeepholeOpt : public MachineFunctionPass {
  48. static char ID;
  49. AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
  50. initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
  51. }
  52. const AArch64InstrInfo *TII;
  53. const AArch64RegisterInfo *TRI;
  54. MachineLoopInfo *MLI;
  55. MachineRegisterInfo *MRI;
  56. using OpcodePair = std::pair<unsigned, unsigned>;
  57. template <typename T>
  58. using SplitAndOpcFunc =
  59. std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
  60. using BuildMIFunc =
  61. std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
  62. Register, Register, Register)>;
  63. /// For instructions where an immediate operand could be split into two
  64. /// separate immediate instructions, use the splitTwoPartImm two handle the
  65. /// optimization.
  66. ///
  67. /// To implement, the following function types must be passed to
  68. /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
  69. /// splitting the immediate is valid and returns the associated new opcode. A
  70. /// BuildMIFunc must be implemented to build the two immediate instructions.
  71. ///
  72. /// Example Pattern (where IMM would require 2+ MOV instructions):
  73. /// %dst = <Instr>rr %src IMM [...]
  74. /// becomes:
  75. /// %tmp = <Instr>ri %src (encode half IMM) [...]
  76. /// %dst = <Instr>ri %tmp (encode half IMM) [...]
  77. template <typename T>
  78. bool splitTwoPartImm(MachineInstr &MI,
  79. SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
  80. bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
  81. MachineInstr *&SubregToRegMI);
  82. template <typename T>
  83. bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
  84. template <typename T>
  85. bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
  86. template <typename T>
  87. bool visitAND(unsigned Opc, MachineInstr &MI);
  88. bool visitORR(MachineInstr &MI);
  89. bool visitINSERT(MachineInstr &MI);
  90. bool runOnMachineFunction(MachineFunction &MF) override;
  91. StringRef getPassName() const override {
  92. return "AArch64 MI Peephole Optimization pass";
  93. }
  94. void getAnalysisUsage(AnalysisUsage &AU) const override {
  95. AU.setPreservesCFG();
  96. AU.addRequired<MachineLoopInfo>();
  97. MachineFunctionPass::getAnalysisUsage(AU);
  98. }
  99. };
  100. char AArch64MIPeepholeOpt::ID = 0;
  101. } // end anonymous namespace
  102. INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
  103. "AArch64 MI Peephole Optimization", false, false)
  104. template <typename T>
  105. static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
  106. T UImm = static_cast<T>(Imm);
  107. if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
  108. return false;
  109. // If this immediate can be handled by one instruction, do not split it.
  110. SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
  111. AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
  112. if (Insn.size() == 1)
  113. return false;
  114. // The bitmask immediate consists of consecutive ones. Let's say there is
  115. // constant 0b00000000001000000000010000000000 which does not consist of
  116. // consecutive ones. We can split it in to two bitmask immediate like
  117. // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
  118. // If we do AND with these two bitmask immediate, we can see original one.
  119. unsigned LowestBitSet = countTrailingZeros(UImm);
  120. unsigned HighestBitSet = Log2_64(UImm);
  121. // Create a mask which is filled with one from the position of lowest bit set
  122. // to the position of highest bit set.
  123. T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
  124. (static_cast<T>(1) << LowestBitSet);
  125. // Create a mask which is filled with one outside the position of lowest bit
  126. // set and the position of highest bit set.
  127. T NewImm2 = UImm | ~NewImm1;
  128. // If the split value is not valid bitmask immediate, do not split this
  129. // constant.
  130. if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
  131. return false;
  132. Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
  133. Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
  134. return true;
  135. }
  136. template <typename T>
  137. bool AArch64MIPeepholeOpt::visitAND(
  138. unsigned Opc, MachineInstr &MI) {
  139. // Try below transformation.
  140. //
  141. // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
  142. // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
  143. //
  144. // The mov pseudo instruction could be expanded to multiple mov instructions
  145. // later. Let's try to split the constant operand of mov instruction into two
  146. // bitmask immediates. It makes only two AND instructions intead of multiple
  147. // mov + and instructions.
  148. return splitTwoPartImm<T>(
  149. MI,
  150. [Opc](T Imm, unsigned RegSize, T &Imm0,
  151. T &Imm1) -> std::optional<OpcodePair> {
  152. if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
  153. return std::make_pair(Opc, Opc);
  154. return std::nullopt;
  155. },
  156. [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
  157. unsigned Imm1, Register SrcReg, Register NewTmpReg,
  158. Register NewDstReg) {
  159. DebugLoc DL = MI.getDebugLoc();
  160. MachineBasicBlock *MBB = MI.getParent();
  161. BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
  162. .addReg(SrcReg)
  163. .addImm(Imm0);
  164. BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
  165. .addReg(NewTmpReg)
  166. .addImm(Imm1);
  167. });
  168. }
  169. bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
  170. // Check this ORR comes from below zero-extend pattern.
  171. //
  172. // def : Pat<(i64 (zext GPR32:$src)),
  173. // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
  174. if (MI.getOperand(3).getImm() != 0)
  175. return false;
  176. if (MI.getOperand(1).getReg() != AArch64::WZR)
  177. return false;
  178. MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
  179. if (!SrcMI)
  180. return false;
  181. // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
  182. //
  183. // When you use the 32-bit form of an instruction, the upper 32 bits of the
  184. // source registers are ignored and the upper 32 bits of the destination
  185. // register are set to zero.
  186. //
  187. // If AArch64's 32-bit form of instruction defines the source operand of
  188. // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
  189. // real AArch64 instruction and if it is not, do not process the opcode
  190. // conservatively.
  191. if (SrcMI->getOpcode() == TargetOpcode::COPY &&
  192. SrcMI->getOperand(1).getReg().isVirtual()) {
  193. const TargetRegisterClass *RC =
  194. MRI->getRegClass(SrcMI->getOperand(1).getReg());
  195. // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
  196. // that the upper bits are zero.
  197. if (RC != &AArch64::FPR32RegClass &&
  198. ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
  199. SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
  200. return false;
  201. Register CpySrc = SrcMI->getOperand(1).getReg();
  202. if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
  203. CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
  204. BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
  205. TII->get(TargetOpcode::COPY), CpySrc)
  206. .add(SrcMI->getOperand(1));
  207. }
  208. BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
  209. TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
  210. .addReg(CpySrc);
  211. SrcMI->eraseFromParent();
  212. }
  213. else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
  214. return false;
  215. Register DefReg = MI.getOperand(0).getReg();
  216. Register SrcReg = MI.getOperand(2).getReg();
  217. MRI->replaceRegWith(DefReg, SrcReg);
  218. MRI->clearKillFlags(SrcReg);
  219. LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
  220. MI.eraseFromParent();
  221. return true;
  222. }
  223. bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
  224. // Check this INSERT_SUBREG comes from below zero-extend pattern.
  225. //
  226. // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
  227. // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
  228. //
  229. // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
  230. // COPY would destroy the upper part of the register anyway
  231. if (!MI.isRegTiedToDefOperand(1))
  232. return false;
  233. Register DstReg = MI.getOperand(0).getReg();
  234. const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
  235. MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
  236. if (!SrcMI)
  237. return false;
  238. // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
  239. //
  240. // When you use the 32-bit form of an instruction, the upper 32 bits of the
  241. // source registers are ignored and the upper 32 bits of the destination
  242. // register are set to zero.
  243. //
  244. // If AArch64's 32-bit form of instruction defines the source operand of
  245. // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
  246. // real AArch64 instruction and if it is not, do not process the opcode
  247. // conservatively.
  248. if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
  249. !AArch64::GPR64allRegClass.hasSubClassEq(RC))
  250. return false;
  251. // Build a SUBREG_TO_REG instruction
  252. MachineInstr *SubregMI =
  253. BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
  254. TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
  255. .addImm(0)
  256. .add(MI.getOperand(2))
  257. .add(MI.getOperand(3));
  258. LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
  259. (void)SubregMI;
  260. MI.eraseFromParent();
  261. return true;
  262. }
  263. template <typename T>
  264. static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
  265. // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
  266. // imm0 and imm1 are non-zero 12-bit unsigned int.
  267. if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
  268. (Imm & ~static_cast<T>(0xffffff)) != 0)
  269. return false;
  270. // The immediate can not be composed via a single instruction.
  271. SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
  272. AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
  273. if (Insn.size() == 1)
  274. return false;
  275. // Split Imm into (Imm0 << 12) + Imm1;
  276. Imm0 = (Imm >> 12) & 0xfff;
  277. Imm1 = Imm & 0xfff;
  278. return true;
  279. }
  280. template <typename T>
  281. bool AArch64MIPeepholeOpt::visitADDSUB(
  282. unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
  283. // Try below transformation.
  284. //
  285. // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
  286. // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
  287. //
  288. // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
  289. // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
  290. //
  291. // The mov pseudo instruction could be expanded to multiple mov instructions
  292. // later. Let's try to split the constant operand of mov instruction into two
  293. // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
  294. // multiple `mov` + `and/sub` instructions.
  295. return splitTwoPartImm<T>(
  296. MI,
  297. [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
  298. T &Imm1) -> std::optional<OpcodePair> {
  299. if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
  300. return std::make_pair(PosOpc, PosOpc);
  301. if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
  302. return std::make_pair(NegOpc, NegOpc);
  303. return std::nullopt;
  304. },
  305. [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
  306. unsigned Imm1, Register SrcReg, Register NewTmpReg,
  307. Register NewDstReg) {
  308. DebugLoc DL = MI.getDebugLoc();
  309. MachineBasicBlock *MBB = MI.getParent();
  310. BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
  311. .addReg(SrcReg)
  312. .addImm(Imm0)
  313. .addImm(12);
  314. BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
  315. .addReg(NewTmpReg)
  316. .addImm(Imm1)
  317. .addImm(0);
  318. });
  319. }
  320. template <typename T>
  321. bool AArch64MIPeepholeOpt::visitADDSSUBS(
  322. OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
  323. // Try the same transformation as ADDSUB but with additional requirement
  324. // that the condition code usages are only for Equal and Not Equal
  325. return splitTwoPartImm<T>(
  326. MI,
  327. [PosOpcs, NegOpcs, &MI, &TRI = TRI,
  328. &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
  329. T &Imm1) -> std::optional<OpcodePair> {
  330. OpcodePair OP;
  331. if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
  332. OP = PosOpcs;
  333. else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
  334. OP = NegOpcs;
  335. else
  336. return std::nullopt;
  337. // Check conditional uses last since it is expensive for scanning
  338. // proceeding instructions
  339. MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
  340. std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
  341. if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
  342. return std::nullopt;
  343. return OP;
  344. },
  345. [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
  346. unsigned Imm1, Register SrcReg, Register NewTmpReg,
  347. Register NewDstReg) {
  348. DebugLoc DL = MI.getDebugLoc();
  349. MachineBasicBlock *MBB = MI.getParent();
  350. BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
  351. .addReg(SrcReg)
  352. .addImm(Imm0)
  353. .addImm(12);
  354. BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
  355. .addReg(NewTmpReg)
  356. .addImm(Imm1)
  357. .addImm(0);
  358. });
  359. }
  360. // Checks if the corresponding MOV immediate instruction is applicable for
  361. // this peephole optimization.
  362. bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
  363. MachineInstr *&MovMI,
  364. MachineInstr *&SubregToRegMI) {
  365. // Check whether current MBB is in loop and the AND is loop invariant.
  366. MachineBasicBlock *MBB = MI.getParent();
  367. MachineLoop *L = MLI->getLoopFor(MBB);
  368. if (L && !L->isLoopInvariant(MI))
  369. return false;
  370. // Check whether current MI's operand is MOV with immediate.
  371. MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
  372. if (!MovMI)
  373. return false;
  374. // If it is SUBREG_TO_REG, check its operand.
  375. SubregToRegMI = nullptr;
  376. if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
  377. SubregToRegMI = MovMI;
  378. MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
  379. if (!MovMI)
  380. return false;
  381. }
  382. if (MovMI->getOpcode() != AArch64::MOVi32imm &&
  383. MovMI->getOpcode() != AArch64::MOVi64imm)
  384. return false;
  385. // If the MOV has multiple uses, do not split the immediate because it causes
  386. // more instructions.
  387. if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
  388. return false;
  389. if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
  390. return false;
  391. // It is OK to perform this peephole optimization.
  392. return true;
  393. }
  394. template <typename T>
  395. bool AArch64MIPeepholeOpt::splitTwoPartImm(
  396. MachineInstr &MI,
  397. SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
  398. unsigned RegSize = sizeof(T) * 8;
  399. assert((RegSize == 32 || RegSize == 64) &&
  400. "Invalid RegSize for legal immediate peephole optimization");
  401. // Perform several essential checks against current MI.
  402. MachineInstr *MovMI, *SubregToRegMI;
  403. if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
  404. return false;
  405. // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
  406. T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
  407. // For the 32 bit form of instruction, the upper 32 bits of the destination
  408. // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
  409. // of Imm to zero. This is essential if the Immediate value was a negative
  410. // number since it was sign extended when we assign to the 64-bit Imm.
  411. if (SubregToRegMI)
  412. Imm &= 0xFFFFFFFF;
  413. OpcodePair Opcode;
  414. if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
  415. Opcode = *R;
  416. else
  417. return false;
  418. // Create new MIs using the first and second opcodes. Opcodes might differ for
  419. // flag setting operations that should only set flags on second instruction.
  420. // NewTmpReg = Opcode.first SrcReg Imm0
  421. // NewDstReg = Opcode.second NewTmpReg Imm1
  422. // Determine register classes for destinations and register operands
  423. MachineFunction *MF = MI.getMF();
  424. const TargetRegisterClass *FirstInstrDstRC =
  425. TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
  426. const TargetRegisterClass *FirstInstrOperandRC =
  427. TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
  428. const TargetRegisterClass *SecondInstrDstRC =
  429. (Opcode.first == Opcode.second)
  430. ? FirstInstrDstRC
  431. : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
  432. const TargetRegisterClass *SecondInstrOperandRC =
  433. (Opcode.first == Opcode.second)
  434. ? FirstInstrOperandRC
  435. : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
  436. // Get old registers destinations and new register destinations
  437. Register DstReg = MI.getOperand(0).getReg();
  438. Register SrcReg = MI.getOperand(1).getReg();
  439. Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
  440. // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
  441. // reuse that same destination register.
  442. Register NewDstReg = DstReg.isVirtual()
  443. ? MRI->createVirtualRegister(SecondInstrDstRC)
  444. : DstReg;
  445. // Constrain registers based on their new uses
  446. MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
  447. MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
  448. if (DstReg != NewDstReg)
  449. MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
  450. // Call the delegating operation to build the instruction
  451. BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
  452. // replaceRegWith changes MI's definition register. Keep it for SSA form until
  453. // deleting MI. Only if we made a new destination register.
  454. if (DstReg != NewDstReg) {
  455. MRI->replaceRegWith(DstReg, NewDstReg);
  456. MI.getOperand(0).setReg(DstReg);
  457. }
  458. // Record the MIs need to be removed.
  459. MI.eraseFromParent();
  460. if (SubregToRegMI)
  461. SubregToRegMI->eraseFromParent();
  462. MovMI->eraseFromParent();
  463. return true;
  464. }
  465. bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
  466. if (skipFunction(MF.getFunction()))
  467. return false;
  468. TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  469. TRI = static_cast<const AArch64RegisterInfo *>(
  470. MF.getSubtarget().getRegisterInfo());
  471. MLI = &getAnalysis<MachineLoopInfo>();
  472. MRI = &MF.getRegInfo();
  473. assert(MRI->isSSA() && "Expected to be run on SSA form!");
  474. bool Changed = false;
  475. for (MachineBasicBlock &MBB : MF) {
  476. for (MachineInstr &MI : make_early_inc_range(MBB)) {
  477. switch (MI.getOpcode()) {
  478. default:
  479. break;
  480. case AArch64::INSERT_SUBREG:
  481. Changed = visitINSERT(MI);
  482. break;
  483. case AArch64::ANDWrr:
  484. Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
  485. break;
  486. case AArch64::ANDXrr:
  487. Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
  488. break;
  489. case AArch64::ORRWrs:
  490. Changed = visitORR(MI);
  491. break;
  492. case AArch64::ADDWrr:
  493. Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
  494. break;
  495. case AArch64::SUBWrr:
  496. Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
  497. break;
  498. case AArch64::ADDXrr:
  499. Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
  500. break;
  501. case AArch64::SUBXrr:
  502. Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
  503. break;
  504. case AArch64::ADDSWrr:
  505. Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
  506. {AArch64::SUBWri, AArch64::SUBSWri},
  507. MI);
  508. break;
  509. case AArch64::SUBSWrr:
  510. Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
  511. {AArch64::ADDWri, AArch64::ADDSWri},
  512. MI);
  513. break;
  514. case AArch64::ADDSXrr:
  515. Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
  516. {AArch64::SUBXri, AArch64::SUBSXri},
  517. MI);
  518. break;
  519. case AArch64::SUBSXrr:
  520. Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
  521. {AArch64::ADDXri, AArch64::ADDSXri},
  522. MI);
  523. break;
  524. }
  525. }
  526. }
  527. return Changed;
  528. }
  529. FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
  530. return new AArch64MIPeepholeOpt();
  531. }