PPCCTRLoops.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass generates machine instructions for the CTR loops related pseudos:
  10. // 1: MTCTRloop/DecreaseCTRloop
  11. // 2: MTCTR8loop/DecreaseCTR8loop
  12. //
  13. // If a CTR loop can be generated:
  14. // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
  15. // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
  16. // its user branch instruction can be deleted.
  17. //
  18. // If a CTR loop can not be generated due to clobber of CTR:
  19. // 1: MTCTRloop/MTCTR8loop can be deleted.
  20. // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
  21. // a "cmplwi/cmpldi".
  22. //
  23. // This pass runs just before register allocation, because we don't want
  24. // register allocator to allocate register for DecreaseCTRloop if a CTR can be
  25. // generated or if a CTR loop can not be generated, we don't have any condition
  26. // register for the new added "cmplwi/cmpldi".
  27. //
  28. //===----------------------------------------------------------------------===//
  29. #include "PPC.h"
  30. #include "PPCInstrInfo.h"
  31. #include "PPCSubtarget.h"
  32. #include "llvm/ADT/Statistic.h"
  33. #include "llvm/CodeGen/MachineBasicBlock.h"
  34. #include "llvm/CodeGen/MachineFunction.h"
  35. #include "llvm/CodeGen/MachineFunctionPass.h"
  36. #include "llvm/CodeGen/MachineInstr.h"
  37. #include "llvm/CodeGen/MachineLoopInfo.h"
  38. #include "llvm/CodeGen/MachineOperand.h"
  39. #include "llvm/CodeGen/MachineRegisterInfo.h"
  40. #include "llvm/CodeGen/Register.h"
  41. #include "llvm/InitializePasses.h"
  42. #include "llvm/Pass.h"
  43. #include "llvm/PassRegistry.h"
  44. #include "llvm/Support/CodeGen.h"
  45. #include "llvm/Support/Debug.h"
  46. #include "llvm/Support/ErrorHandling.h"
  47. #include <cassert>
  48. using namespace llvm;
  49. #define DEBUG_TYPE "ppc-ctrloops"
  50. STATISTIC(NumCTRLoops, "Number of CTR loops generated");
  51. STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
  52. namespace {
  53. class PPCCTRLoops : public MachineFunctionPass {
  54. public:
  55. static char ID;
  56. PPCCTRLoops() : MachineFunctionPass(ID) {
  57. initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
  58. }
  59. void getAnalysisUsage(AnalysisUsage &AU) const override {
  60. AU.addRequired<MachineLoopInfo>();
  61. MachineFunctionPass::getAnalysisUsage(AU);
  62. }
  63. bool runOnMachineFunction(MachineFunction &MF) override;
  64. private:
  65. const PPCInstrInfo *TII = nullptr;
  66. MachineRegisterInfo *MRI = nullptr;
  67. bool processLoop(MachineLoop *ML);
  68. bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
  69. void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
  70. MachineInstr *Dec);
  71. void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
  72. };
  73. } // namespace
  74. char PPCCTRLoops::ID = 0;
  75. INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
  76. false, false)
  77. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  78. INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
  79. false, false)
  80. FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
  81. bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
  82. bool Changed = false;
  83. auto &MLI = getAnalysis<MachineLoopInfo>();
  84. TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
  85. MRI = &MF.getRegInfo();
  86. for (auto *ML : MLI) {
  87. if (ML->isOutermost())
  88. Changed |= processLoop(ML);
  89. }
  90. #ifndef NDEBUG
  91. for (const MachineBasicBlock &BB : MF) {
  92. for (const MachineInstr &I : BB)
  93. assert((I.getOpcode() != PPC::DecreaseCTRloop &&
  94. I.getOpcode() != PPC::DecreaseCTR8loop) &&
  95. "CTR loop pseudo is not expanded!");
  96. }
  97. #endif
  98. return Changed;
  99. }
  100. bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
  101. if (!CheckReads) {
  102. // If we are only checking for defs, that is we are going to find
  103. // definitions before MTCTRloop, for this case:
  104. // CTR defination inside the callee of a call instruction will not impact
  105. // the defination of MTCTRloop, so we can use definesRegister() for the
  106. // check, no need to check the regmask.
  107. return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
  108. }
  109. if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
  110. return true;
  111. if (MI->getDesc().isCall())
  112. return true;
  113. // We define the CTR in the loop preheader, so if there is any CTR reader in
  114. // the loop, we also can not use CTR loop form.
  115. if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
  116. return true;
  117. return false;
  118. }
  119. bool PPCCTRLoops::processLoop(MachineLoop *ML) {
  120. bool Changed = false;
  121. // Align with HardwareLoop pass, process inner loops first.
  122. for (MachineLoop *I : *ML)
  123. Changed |= processLoop(I);
  124. // If any inner loop is changed, outter loop must be without hardware loop
  125. // intrinsics.
  126. if (Changed)
  127. return true;
  128. auto IsLoopStart = [](MachineInstr &MI) {
  129. return MI.getOpcode() == PPC::MTCTRloop ||
  130. MI.getOpcode() == PPC::MTCTR8loop;
  131. };
  132. auto SearchForStart =
  133. [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
  134. for (auto &MI : *MBB) {
  135. if (IsLoopStart(MI))
  136. return &MI;
  137. }
  138. return nullptr;
  139. };
  140. MachineInstr *Start = nullptr;
  141. MachineInstr *Dec = nullptr;
  142. bool InvalidCTRLoop = false;
  143. MachineBasicBlock *Preheader = ML->getLoopPreheader();
  144. // If there is no preheader for this loop, there must be no MTCTRloop
  145. // either.
  146. if (!Preheader)
  147. return false;
  148. Start = SearchForStart(Preheader);
  149. // This is not a CTR loop candidate.
  150. if (!Start)
  151. return false;
  152. // If CTR is live to the preheader, we can not redefine the CTR register.
  153. if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
  154. InvalidCTRLoop = true;
  155. // Make sure there is also no CTR clobber in the block preheader between the
  156. // begin and MTCTR.
  157. for (MachineBasicBlock::reverse_instr_iterator I =
  158. std::next(Start->getReverseIterator());
  159. I != Preheader->instr_rend(); ++I)
  160. // Only check the definitions of CTR. If there is non-dead definition for
  161. // the CTR, we conservatively don't generate a CTR loop.
  162. if (isCTRClobber(&*I, /* CheckReads */ false)) {
  163. InvalidCTRLoop = true;
  164. break;
  165. }
  166. // Make sure there is also no CTR clobber/user in the block preheader between
  167. // MTCTR and the end.
  168. for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
  169. I != Preheader->instr_end(); ++I)
  170. if (isCTRClobber(&*I, /* CheckReads */ true)) {
  171. InvalidCTRLoop = true;
  172. break;
  173. }
  174. // Find the CTR loop components and decide whether or not to fall back to a
  175. // normal loop.
  176. for (auto *MBB : reverse(ML->getBlocks())) {
  177. for (auto &MI : *MBB) {
  178. if (MI.getOpcode() == PPC::DecreaseCTRloop ||
  179. MI.getOpcode() == PPC::DecreaseCTR8loop)
  180. Dec = &MI;
  181. else if (!InvalidCTRLoop)
  182. // If any instruction clobber CTR, then we can not generate a CTR loop.
  183. InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
  184. }
  185. if (Dec && InvalidCTRLoop)
  186. break;
  187. }
  188. assert(Dec && "CTR loop is not complete!");
  189. if (InvalidCTRLoop) {
  190. expandNormalLoops(ML, Start, Dec);
  191. ++NumNormalLoops;
  192. }
  193. else {
  194. expandCTRLoops(ML, Start, Dec);
  195. ++NumCTRLoops;
  196. }
  197. return true;
  198. }
  199. void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
  200. MachineInstr *Dec) {
  201. bool Is64Bit =
  202. Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
  203. MachineBasicBlock *Preheader = Start->getParent();
  204. MachineBasicBlock *Exiting = Dec->getParent();
  205. assert((Preheader && Exiting) &&
  206. "Preheader and exiting should exist for CTR loop!");
  207. assert(Dec->getOperand(1).getImm() == 1 &&
  208. "Loop decrement stride must be 1");
  209. unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
  210. unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
  211. Register PHIDef =
  212. MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
  213. : &PPC::GPRC_and_GPRC_NOR0RegClass);
  214. Start->getParent()->getParent()->getProperties().reset(
  215. MachineFunctionProperties::Property::NoPHIs);
  216. // Generate "PHI" in the header block.
  217. auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
  218. DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
  219. PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
  220. Register ADDIDef =
  221. MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
  222. : &PPC::GPRC_and_GPRC_NOR0RegClass);
  223. // Generate "addi -1" in the exiting block.
  224. BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
  225. .addReg(PHIDef)
  226. .addImm(-1);
  227. // Add other inputs for the PHI node.
  228. if (ML->isLoopLatch(Exiting)) {
  229. // There must be only two predecessors for the loop header, one is the
  230. // Preheader and the other one is loop latch Exiting. In hardware loop
  231. // insertion pass, the block containing DecreaseCTRloop must dominate all
  232. // loop latches. So there must be only one latch.
  233. assert(ML->getHeader()->pred_size() == 2 &&
  234. "Loop header predecessor is not right!");
  235. PHIMIB.addReg(ADDIDef).addMBB(Exiting);
  236. } else {
  237. // If the block containing DecreaseCTRloop is not a loop latch, we can use
  238. // ADDIDef as the value for all other blocks for the PHI. In hardware loop
  239. // insertion pass, the block containing DecreaseCTRloop must dominate all
  240. // loop latches.
  241. for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
  242. if (ML->contains(P)) {
  243. assert(ML->isLoopLatch(P) &&
  244. "Loop's header in-loop predecessor is not loop latch!");
  245. PHIMIB.addReg(ADDIDef).addMBB(P);
  246. } else
  247. assert(P == Preheader &&
  248. "CTR loop should not be generated for irreducible loop!");
  249. }
  250. }
  251. // Generate the compare in the exiting block.
  252. Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
  253. auto CMPMIB =
  254. BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
  255. .addReg(ADDIDef)
  256. .addImm(0);
  257. BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
  258. Dec->getOperand(0).getReg())
  259. .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
  260. // Remove the pseudo instructions.
  261. Start->eraseFromParent();
  262. Dec->eraseFromParent();
  263. }
  264. void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
  265. MachineInstr *Dec) {
  266. bool Is64Bit =
  267. Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
  268. MachineBasicBlock *Preheader = Start->getParent();
  269. MachineBasicBlock *Exiting = Dec->getParent();
  270. (void)Preheader;
  271. assert((Preheader && Exiting) &&
  272. "Preheader and exiting should exist for CTR loop!");
  273. assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
  274. unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
  275. unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
  276. auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
  277. assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
  278. "There should be only one user for loop decrement pseudo!");
  279. unsigned Opcode = 0;
  280. switch (BrInstr->getOpcode()) {
  281. case PPC::BC:
  282. Opcode = BDNZOpcode;
  283. (void) ML;
  284. assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
  285. "Invalid ctr loop!");
  286. break;
  287. case PPC::BCn:
  288. Opcode = BDZOpcode;
  289. assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
  290. "Invalid ctr loop!");
  291. break;
  292. default:
  293. llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
  294. }
  295. // Generate "bdnz/bdz" in the exiting block just before the terminator.
  296. BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
  297. .addMBB(BrInstr->getOperand(1).getMBB());
  298. // Remove the pseudo instructions.
  299. BrInstr->eraseFromParent();
  300. Dec->eraseFromParent();
  301. }