PPCPreEmitPeephole.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // A pre-emit peephole for catching opportunities introduced by late passes such
  10. // as MachineBlockPlacement.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "PPC.h"
  14. #include "PPCInstrInfo.h"
  15. #include "PPCSubtarget.h"
  16. #include "llvm/ADT/DenseMap.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/CodeGen/LivePhysRegs.h"
  19. #include "llvm/CodeGen/MachineBasicBlock.h"
  20. #include "llvm/CodeGen/MachineFunctionPass.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineRegisterInfo.h"
  23. #include "llvm/CodeGen/RegisterScavenging.h"
  24. #include "llvm/MC/MCContext.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. using namespace llvm;
  28. #define DEBUG_TYPE "ppc-pre-emit-peephole"
  29. STATISTIC(NumRRConvertedInPreEmit,
  30. "Number of r+r instructions converted to r+i in pre-emit peephole");
  31. STATISTIC(NumRemovedInPreEmit,
  32. "Number of instructions deleted in pre-emit peephole");
  33. STATISTIC(NumberOfSelfCopies,
  34. "Number of self copy instructions eliminated");
  35. STATISTIC(NumFrameOffFoldInPreEmit,
  36. "Number of folding frame offset by using r+r in pre-emit peephole");
  37. STATISTIC(NumCmpsInPreEmit,
  38. "Number of compares eliminated in pre-emit peephole");
  39. static cl::opt<bool>
  40. EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
  41. cl::desc("enable PC Relative linker optimization"));
  42. static cl::opt<bool>
  43. RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
  44. cl::desc("Run pre-emit peephole optimizations."));
  45. static cl::opt<uint64_t>
  46. DSCRValue("ppc-set-dscr", cl::Hidden,
  47. cl::desc("Set the Data Stream Control Register."));
  48. namespace {
  49. static bool hasPCRelativeForm(MachineInstr &Use) {
  50. switch (Use.getOpcode()) {
  51. default:
  52. return false;
  53. case PPC::LBZ:
  54. case PPC::LBZ8:
  55. case PPC::LHA:
  56. case PPC::LHA8:
  57. case PPC::LHZ:
  58. case PPC::LHZ8:
  59. case PPC::LWZ:
  60. case PPC::LWZ8:
  61. case PPC::STB:
  62. case PPC::STB8:
  63. case PPC::STH:
  64. case PPC::STH8:
  65. case PPC::STW:
  66. case PPC::STW8:
  67. case PPC::LD:
  68. case PPC::STD:
  69. case PPC::LWA:
  70. case PPC::LXSD:
  71. case PPC::LXSSP:
  72. case PPC::LXV:
  73. case PPC::STXSD:
  74. case PPC::STXSSP:
  75. case PPC::STXV:
  76. case PPC::LFD:
  77. case PPC::LFS:
  78. case PPC::STFD:
  79. case PPC::STFS:
  80. case PPC::DFLOADf32:
  81. case PPC::DFLOADf64:
  82. case PPC::DFSTOREf32:
  83. case PPC::DFSTOREf64:
  84. return true;
  85. }
  86. }
  87. class PPCPreEmitPeephole : public MachineFunctionPass {
  88. public:
  89. static char ID;
  90. PPCPreEmitPeephole() : MachineFunctionPass(ID) {
  91. initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
  92. }
  93. void getAnalysisUsage(AnalysisUsage &AU) const override {
  94. MachineFunctionPass::getAnalysisUsage(AU);
  95. }
  96. MachineFunctionProperties getRequiredProperties() const override {
  97. return MachineFunctionProperties().set(
  98. MachineFunctionProperties::Property::NoVRegs);
  99. }
  100. // This function removes any redundant load immediates. It has two level
  101. // loops - The outer loop finds the load immediates BBI that could be used
  102. // to replace following redundancy. The inner loop scans instructions that
  103. // after BBI to find redundancy and update kill/dead flags accordingly. If
  104. // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
  105. // that modify the def register of BBI would break the scanning.
  106. // DeadOrKillToUnset is a pointer to the previous operand that had the
  107. // kill/dead flag set. It keeps track of the def register of BBI, the use
  108. // registers of AfterBBIs and the def registers of AfterBBIs.
  109. bool removeRedundantLIs(MachineBasicBlock &MBB,
  110. const TargetRegisterInfo *TRI) {
  111. LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
  112. MBB.dump(); dbgs() << "\n");
  113. DenseSet<MachineInstr *> InstrsToErase;
  114. for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
  115. // Skip load immediate that is marked to be erased later because it
  116. // cannot be used to replace any other instructions.
  117. if (InstrsToErase.contains(&*BBI))
  118. continue;
  119. // Skip non-load immediate.
  120. unsigned Opc = BBI->getOpcode();
  121. if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
  122. Opc != PPC::LIS8)
  123. continue;
  124. // Skip load immediate, where the operand is a relocation (e.g., $r3 =
  125. // LI target-flags(ppc-lo) %const.0).
  126. if (!BBI->getOperand(1).isImm())
  127. continue;
  128. assert(BBI->getOperand(0).isReg() &&
  129. "Expected a register for the first operand");
  130. LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
  131. Register Reg = BBI->getOperand(0).getReg();
  132. int64_t Imm = BBI->getOperand(1).getImm();
  133. MachineOperand *DeadOrKillToUnset = nullptr;
  134. if (BBI->getOperand(0).isDead()) {
  135. DeadOrKillToUnset = &BBI->getOperand(0);
  136. LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
  137. << " from load immediate " << *BBI
  138. << " is a unsetting candidate\n");
  139. }
  140. // This loop scans instructions after BBI to see if there is any
  141. // redundant load immediate.
  142. for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
  143. ++AfterBBI) {
  144. // Track the operand that kill Reg. We would unset the kill flag of
  145. // the operand if there is a following redundant load immediate.
  146. int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
  147. // We can't just clear implicit kills, so if we encounter one, stop
  148. // looking further.
  149. if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
  150. LLVM_DEBUG(dbgs()
  151. << "Encountered an implicit kill, cannot proceed: ");
  152. LLVM_DEBUG(AfterBBI->dump());
  153. break;
  154. }
  155. if (KillIdx != -1) {
  156. assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
  157. DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
  158. LLVM_DEBUG(dbgs()
  159. << " Kill flag of " << *DeadOrKillToUnset << " from "
  160. << *AfterBBI << " is a unsetting candidate\n");
  161. }
  162. if (!AfterBBI->modifiesRegister(Reg, TRI))
  163. continue;
  164. // Finish scanning because Reg is overwritten by a non-load
  165. // instruction.
  166. if (AfterBBI->getOpcode() != Opc)
  167. break;
  168. assert(AfterBBI->getOperand(0).isReg() &&
  169. "Expected a register for the first operand");
  170. // Finish scanning because Reg is overwritten by a relocation or a
  171. // different value.
  172. if (!AfterBBI->getOperand(1).isImm() ||
  173. AfterBBI->getOperand(1).getImm() != Imm)
  174. break;
  175. // It loads same immediate value to the same Reg, which is redundant.
  176. // We would unset kill flag in previous Reg usage to extend live range
  177. // of Reg first, then remove the redundancy.
  178. if (DeadOrKillToUnset) {
  179. LLVM_DEBUG(dbgs()
  180. << " Unset dead/kill flag of " << *DeadOrKillToUnset
  181. << " from " << *DeadOrKillToUnset->getParent());
  182. if (DeadOrKillToUnset->isDef())
  183. DeadOrKillToUnset->setIsDead(false);
  184. else
  185. DeadOrKillToUnset->setIsKill(false);
  186. }
  187. DeadOrKillToUnset =
  188. AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
  189. if (DeadOrKillToUnset)
  190. LLVM_DEBUG(dbgs()
  191. << " Dead flag of " << *DeadOrKillToUnset << " from "
  192. << *AfterBBI << " is a unsetting candidate\n");
  193. InstrsToErase.insert(&*AfterBBI);
  194. LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
  195. AfterBBI->dump());
  196. }
  197. }
  198. for (MachineInstr *MI : InstrsToErase) {
  199. MI->eraseFromParent();
  200. }
  201. NumRemovedInPreEmit += InstrsToErase.size();
  202. return !InstrsToErase.empty();
  203. }
  204. // Check if this instruction is a PLDpc that is part of a GOT indirect
  205. // access.
  206. bool isGOTPLDpc(MachineInstr &Instr) {
  207. if (Instr.getOpcode() != PPC::PLDpc)
  208. return false;
  209. // The result must be a register.
  210. const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
  211. if (!LoadedAddressReg.isReg())
  212. return false;
  213. // Make sure that this is a global symbol.
  214. const MachineOperand &SymbolOp = Instr.getOperand(1);
  215. if (!SymbolOp.isGlobal())
  216. return false;
  217. // Finally return true only if the GOT flag is present.
  218. return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
  219. }
  220. bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
  221. MachineFunction *MF = MBB.getParent();
  222. // If the linker opt is disabled then just return.
  223. if (!EnablePCRelLinkerOpt)
  224. return false;
  225. // Add this linker opt only if we are using PC Relative memops.
  226. if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
  227. return false;
  228. // Struct to keep track of one def/use pair for a GOT indirect access.
  229. struct GOTDefUsePair {
  230. MachineBasicBlock::iterator DefInst;
  231. MachineBasicBlock::iterator UseInst;
  232. Register DefReg;
  233. Register UseReg;
  234. bool StillValid;
  235. };
  236. // Vector of def/ues pairs in this basic block.
  237. SmallVector<GOTDefUsePair, 4> CandPairs;
  238. SmallVector<GOTDefUsePair, 4> ValidPairs;
  239. bool MadeChange = false;
  240. // Run through all of the instructions in the basic block and try to
  241. // collect potential pairs of GOT indirect access instructions.
  242. for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
  243. // Look for the initial GOT indirect load.
  244. if (isGOTPLDpc(*BBI)) {
  245. GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
  246. BBI->getOperand(0).getReg(),
  247. PPC::NoRegister, true};
  248. CandPairs.push_back(CurrentPair);
  249. continue;
  250. }
  251. // We haven't encountered any new PLD instructions, nothing to check.
  252. if (CandPairs.empty())
  253. continue;
  254. // Run through the candidate pairs and see if any of the registers
  255. // defined in the PLD instructions are used by this instruction.
  256. // Note: the size of CandPairs can change in the loop.
  257. for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
  258. GOTDefUsePair &Pair = CandPairs[Idx];
  259. // The instruction does not use or modify this PLD's def reg,
  260. // ignore it.
  261. if (!BBI->readsRegister(Pair.DefReg, TRI) &&
  262. !BBI->modifiesRegister(Pair.DefReg, TRI))
  263. continue;
  264. // The use needs to be used in the address computation and not
  265. // as the register being stored for a store.
  266. const MachineOperand *UseOp =
  267. hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
  268. // Check for a valid use.
  269. if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
  270. UseOp->isUse() && UseOp->isKill()) {
  271. Pair.UseInst = BBI;
  272. Pair.UseReg = BBI->getOperand(0).getReg();
  273. ValidPairs.push_back(Pair);
  274. }
  275. CandPairs.erase(CandPairs.begin() + Idx);
  276. }
  277. }
  278. // Go through all of the pairs and check for any more valid uses.
  279. for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
  280. // We shouldn't be here if we don't have a valid pair.
  281. assert(Pair->UseInst.isValid() && Pair->StillValid &&
  282. "Kept an invalid def/use pair for GOT PCRel opt");
  283. // We have found a potential pair. Search through the instructions
  284. // between the def and the use to see if it is valid to mark this as a
  285. // linker opt.
  286. MachineBasicBlock::iterator BBI = Pair->DefInst;
  287. ++BBI;
  288. for (; BBI != Pair->UseInst; ++BBI) {
  289. if (BBI->readsRegister(Pair->UseReg, TRI) ||
  290. BBI->modifiesRegister(Pair->UseReg, TRI)) {
  291. Pair->StillValid = false;
  292. break;
  293. }
  294. }
  295. if (!Pair->StillValid)
  296. continue;
  297. // The load/store instruction that uses the address from the PLD will
  298. // either use a register (for a store) or define a register (for the
  299. // load). That register will be added as an implicit def to the PLD
  300. // and as an implicit use on the second memory op. This is a precaution
  301. // to prevent future passes from using that register between the two
  302. // instructions.
  303. MachineOperand ImplDef =
  304. MachineOperand::CreateReg(Pair->UseReg, true, true);
  305. MachineOperand ImplUse =
  306. MachineOperand::CreateReg(Pair->UseReg, false, true);
  307. Pair->DefInst->addOperand(ImplDef);
  308. Pair->UseInst->addOperand(ImplUse);
  309. // Create the symbol.
  310. MCContext &Context = MF->getContext();
  311. MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
  312. MachineOperand PCRelLabel =
  313. MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
  314. Pair->DefInst->addOperand(*MF, PCRelLabel);
  315. Pair->UseInst->addOperand(*MF, PCRelLabel);
  316. MadeChange |= true;
  317. }
  318. return MadeChange;
  319. }
  320. // This function removes redundant pairs of accumulator prime/unprime
  321. // instructions. In some situations, it's possible the compiler inserts an
  322. // accumulator prime instruction followed by an unprime instruction (e.g.
  323. // when we store an accumulator after restoring it from a spill). If the
  324. // accumulator is not used between the two, they can be removed. This
  325. // function removes these redundant pairs from basic blocks.
  326. // The algorithm is quite straightforward - every time we encounter a prime
  327. // instruction, the primed register is added to a candidate set. Any use
  328. // other than a prime removes the candidate from the set and any de-prime
  329. // of a current candidate marks both the prime and de-prime for removal.
  330. // This way we ensure we only remove prime/de-prime *pairs* with no
  331. // intervening uses.
  332. bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
  333. DenseSet<MachineInstr *> InstrsToErase;
  334. // Initially, none of the acc registers are candidates.
  335. SmallVector<MachineInstr *, 8> Candidates(
  336. PPC::UACCRCRegClass.getNumRegs(), nullptr);
  337. for (MachineInstr &BBI : MBB.instrs()) {
  338. unsigned Opc = BBI.getOpcode();
  339. // If we are visiting a xxmtacc instruction, we add it and its operand
  340. // register to the candidate set.
  341. if (Opc == PPC::XXMTACC) {
  342. Register Acc = BBI.getOperand(0).getReg();
  343. assert(PPC::ACCRCRegClass.contains(Acc) &&
  344. "Unexpected register for XXMTACC");
  345. Candidates[Acc - PPC::ACC0] = &BBI;
  346. }
  347. // If we are visiting a xxmfacc instruction and its operand register is
  348. // in the candidate set, we mark the two instructions for removal.
  349. else if (Opc == PPC::XXMFACC) {
  350. Register Acc = BBI.getOperand(0).getReg();
  351. assert(PPC::ACCRCRegClass.contains(Acc) &&
  352. "Unexpected register for XXMFACC");
  353. if (!Candidates[Acc - PPC::ACC0])
  354. continue;
  355. InstrsToErase.insert(&BBI);
  356. InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
  357. }
  358. // If we are visiting an instruction using an accumulator register
  359. // as operand, we remove it from the candidate set.
  360. else {
  361. for (MachineOperand &Operand : BBI.operands()) {
  362. if (!Operand.isReg())
  363. continue;
  364. Register Reg = Operand.getReg();
  365. if (PPC::ACCRCRegClass.contains(Reg))
  366. Candidates[Reg - PPC::ACC0] = nullptr;
  367. }
  368. }
  369. }
  370. for (MachineInstr *MI : InstrsToErase)
  371. MI->eraseFromParent();
  372. NumRemovedInPreEmit += InstrsToErase.size();
  373. return !InstrsToErase.empty();
  374. }
  375. bool runOnMachineFunction(MachineFunction &MF) override {
  376. // If the user wants to set the DSCR using command-line options,
  377. // load in the specified value at the start of main.
  378. if (DSCRValue.getNumOccurrences() > 0 && MF.getName().equals("main") &&
  379. MF.getFunction().hasExternalLinkage()) {
  380. DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask
  381. RegScavenger RS;
  382. MachineBasicBlock &MBB = MF.front();
  383. // Find an unused GPR according to register liveness
  384. RS.enterBasicBlock(MBB);
  385. unsigned InDSCR = RS.FindUnusedReg(&PPC::GPRCRegClass);
  386. if (InDSCR) {
  387. const PPCInstrInfo *TII =
  388. MF.getSubtarget<PPCSubtarget>().getInstrInfo();
  389. DebugLoc dl;
  390. MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point
  391. // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and
  392. // ORI, then move to DSCR. If the requested DSCR value is contained
  393. // in a 16-bit signed number, we can emit a single `LI`, but the
  394. // impact of saving one instruction in one function does not warrant
  395. // any additional complexity in the logic here.
  396. BuildMI(MBB, IP, dl, TII->get(PPC::LIS), InDSCR)
  397. .addImm(DSCRValue >> 16);
  398. BuildMI(MBB, IP, dl, TII->get(PPC::ORI), InDSCR)
  399. .addReg(InDSCR)
  400. .addImm(DSCRValue & 0xFFFF);
  401. BuildMI(MBB, IP, dl, TII->get(PPC::MTUDSCR))
  402. .addReg(InDSCR, RegState::Kill);
  403. } else
  404. errs() << "Warning: Ran out of registers - Unable to set DSCR as "
  405. "requested";
  406. }
  407. if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
  408. // Remove UNENCODED_NOP even when this pass is disabled.
  409. // This needs to be done unconditionally so we don't emit zeros
  410. // in the instruction stream.
  411. SmallVector<MachineInstr *, 4> InstrsToErase;
  412. for (MachineBasicBlock &MBB : MF)
  413. for (MachineInstr &MI : MBB)
  414. if (MI.getOpcode() == PPC::UNENCODED_NOP)
  415. InstrsToErase.push_back(&MI);
  416. for (MachineInstr *MI : InstrsToErase)
  417. MI->eraseFromParent();
  418. return false;
  419. }
  420. bool Changed = false;
  421. const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
  422. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
  423. SmallVector<MachineInstr *, 4> InstrsToErase;
  424. for (MachineBasicBlock &MBB : MF) {
  425. Changed |= removeRedundantLIs(MBB, TRI);
  426. Changed |= addLinkerOpt(MBB, TRI);
  427. Changed |= removeAccPrimeUnprime(MBB);
  428. for (MachineInstr &MI : MBB) {
  429. unsigned Opc = MI.getOpcode();
  430. if (Opc == PPC::UNENCODED_NOP) {
  431. InstrsToErase.push_back(&MI);
  432. continue;
  433. }
  434. // Detect self copies - these can result from running AADB.
  435. if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
  436. const MCInstrDesc &MCID = TII->get(Opc);
  437. if (MCID.getNumOperands() == 3 &&
  438. MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
  439. MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
  440. NumberOfSelfCopies++;
  441. LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
  442. LLVM_DEBUG(MI.dump());
  443. InstrsToErase.push_back(&MI);
  444. continue;
  445. }
  446. else if (MCID.getNumOperands() == 2 &&
  447. MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
  448. NumberOfSelfCopies++;
  449. LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
  450. LLVM_DEBUG(MI.dump());
  451. InstrsToErase.push_back(&MI);
  452. continue;
  453. }
  454. }
  455. MachineInstr *DefMIToErase = nullptr;
  456. if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
  457. Changed = true;
  458. NumRRConvertedInPreEmit++;
  459. LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
  460. LLVM_DEBUG(MI.dump());
  461. if (DefMIToErase) {
  462. InstrsToErase.push_back(DefMIToErase);
  463. }
  464. }
  465. if (TII->foldFrameOffset(MI)) {
  466. Changed = true;
  467. NumFrameOffFoldInPreEmit++;
  468. LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
  469. LLVM_DEBUG(MI.dump());
  470. }
  471. if (TII->optimizeCmpPostRA(MI)) {
  472. Changed = true;
  473. NumCmpsInPreEmit++;
  474. LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
  475. LLVM_DEBUG(MI.dump());
  476. InstrsToErase.push_back(&MI);
  477. }
  478. }
  479. // Eliminate conditional branch based on a constant CR bit by
  480. // CRSET or CRUNSET. We eliminate the conditional branch or
  481. // convert it into an unconditional branch. Also, if the CR bit
  482. // is not used by other instructions, we eliminate CRSET as well.
  483. auto I = MBB.getFirstInstrTerminator();
  484. if (I == MBB.instr_end())
  485. continue;
  486. MachineInstr *Br = &*I;
  487. if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
  488. continue;
  489. MachineInstr *CRSetMI = nullptr;
  490. Register CRBit = Br->getOperand(0).getReg();
  491. unsigned CRReg = getCRFromCRBit(CRBit);
  492. bool SeenUse = false;
  493. MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
  494. for (It++; It != Er; It++) {
  495. if (It->modifiesRegister(CRBit, TRI)) {
  496. if ((It->getOpcode() == PPC::CRUNSET ||
  497. It->getOpcode() == PPC::CRSET) &&
  498. It->getOperand(0).getReg() == CRBit)
  499. CRSetMI = &*It;
  500. break;
  501. }
  502. if (It->readsRegister(CRBit, TRI))
  503. SeenUse = true;
  504. }
  505. if (!CRSetMI) continue;
  506. unsigned CRSetOp = CRSetMI->getOpcode();
  507. if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
  508. (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
  509. // Remove this branch since it cannot be taken.
  510. InstrsToErase.push_back(Br);
  511. MBB.removeSuccessor(Br->getOperand(1).getMBB());
  512. }
  513. else {
  514. // This conditional branch is always taken. So, remove all branches
  515. // and insert an unconditional branch to the destination of this.
  516. MachineBasicBlock::iterator It = Br, Er = MBB.end();
  517. for (; It != Er; It++) {
  518. if (It->isDebugInstr()) continue;
  519. assert(It->isTerminator() && "Non-terminator after a terminator");
  520. InstrsToErase.push_back(&*It);
  521. }
  522. if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
  523. ArrayRef<MachineOperand> NoCond;
  524. TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
  525. NoCond, Br->getDebugLoc());
  526. }
  527. for (auto &Succ : MBB.successors())
  528. if (Succ != Br->getOperand(1).getMBB()) {
  529. MBB.removeSuccessor(Succ);
  530. break;
  531. }
  532. }
  533. // If the CRBit is not used by another instruction, we can eliminate
  534. // CRSET/CRUNSET instruction.
  535. if (!SeenUse) {
  536. // We need to check use of the CRBit in successors.
  537. for (auto &SuccMBB : MBB.successors())
  538. if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
  539. SeenUse = true;
  540. break;
  541. }
  542. if (!SeenUse)
  543. InstrsToErase.push_back(CRSetMI);
  544. }
  545. }
  546. for (MachineInstr *MI : InstrsToErase) {
  547. LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
  548. LLVM_DEBUG(MI->dump());
  549. MI->eraseFromParent();
  550. NumRemovedInPreEmit++;
  551. }
  552. return Changed;
  553. }
  554. };
  555. }
  556. INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
  557. false, false)
  558. char PPCPreEmitPeephole::ID = 0;
  559. FunctionPass *llvm::createPPCPreEmitPeepholePass() {
  560. return new PPCPreEmitPeephole();
  561. }