|
- //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This pass generates machine instructions for the CTR loops related pseudos:
- // 1: MTCTRloop/DecreaseCTRloop
- // 2: MTCTR8loop/DecreaseCTR8loop
- //
- // If a CTR loop can be generated:
- // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
- // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
- // its user branch instruction can be deleted.
- //
- // If a CTR loop can not be generated due to clobber of CTR:
- // 1: MTCTRloop/MTCTR8loop can be deleted.
- // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
- // a "cmplwi/cmpldi".
- //
- // This pass runs just before register allocation, because we don't want
- // register allocator to allocate register for DecreaseCTRloop if a CTR can be
- // generated or if a CTR loop can not be generated, we don't have any condition
- // register for the new added "cmplwi/cmpldi".
- //
- //===----------------------------------------------------------------------===//
- #include "PPC.h"
- #include "PPCInstrInfo.h"
- #include "PPCSubtarget.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineFunction.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineLoopInfo.h"
- #include "llvm/CodeGen/MachineOperand.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/Register.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Pass.h"
- #include "llvm/PassRegistry.h"
- #include "llvm/Support/CodeGen.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/ErrorHandling.h"
- #include <cassert>
- using namespace llvm;
- #define DEBUG_TYPE "ppc-ctrloops"
- STATISTIC(NumCTRLoops, "Number of CTR loops generated");
- STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
- namespace {
- class PPCCTRLoops : public MachineFunctionPass {
- public:
- static char ID;
- PPCCTRLoops() : MachineFunctionPass(ID) {
- initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- private:
- const PPCInstrInfo *TII = nullptr;
- MachineRegisterInfo *MRI = nullptr;
- bool processLoop(MachineLoop *ML);
- bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
- void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
- MachineInstr *Dec);
- void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
- };
- } // namespace
- char PPCCTRLoops::ID = 0;
- INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
- false, false)
- INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
- INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
- false, false)
- FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
- bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
- bool Changed = false;
- auto &MLI = getAnalysis<MachineLoopInfo>();
- TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
- MRI = &MF.getRegInfo();
- for (auto *ML : MLI) {
- if (ML->isOutermost())
- Changed |= processLoop(ML);
- }
- #ifndef NDEBUG
- for (const MachineBasicBlock &BB : MF) {
- for (const MachineInstr &I : BB)
- assert((I.getOpcode() != PPC::DecreaseCTRloop &&
- I.getOpcode() != PPC::DecreaseCTR8loop) &&
- "CTR loop pseudo is not expanded!");
- }
- #endif
- return Changed;
- }
- bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
- if (!CheckReads) {
- // If we are only checking for defs, that is we are going to find
- // definitions before MTCTRloop, for this case:
- // CTR defination inside the callee of a call instruction will not impact
- // the defination of MTCTRloop, so we can use definesRegister() for the
- // check, no need to check the regmask.
- return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
- }
- if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
- return true;
- if (MI->getDesc().isCall())
- return true;
- // We define the CTR in the loop preheader, so if there is any CTR reader in
- // the loop, we also can not use CTR loop form.
- if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
- return true;
- return false;
- }
- bool PPCCTRLoops::processLoop(MachineLoop *ML) {
- bool Changed = false;
- // Align with HardwareLoop pass, process inner loops first.
- for (MachineLoop *I : *ML)
- Changed |= processLoop(I);
- // If any inner loop is changed, outter loop must be without hardware loop
- // intrinsics.
- if (Changed)
- return true;
- auto IsLoopStart = [](MachineInstr &MI) {
- return MI.getOpcode() == PPC::MTCTRloop ||
- MI.getOpcode() == PPC::MTCTR8loop;
- };
- auto SearchForStart =
- [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
- for (auto &MI : *MBB) {
- if (IsLoopStart(MI))
- return &MI;
- }
- return nullptr;
- };
- MachineInstr *Start = nullptr;
- MachineInstr *Dec = nullptr;
- bool InvalidCTRLoop = false;
- MachineBasicBlock *Preheader = ML->getLoopPreheader();
- // If there is no preheader for this loop, there must be no MTCTRloop
- // either.
- if (!Preheader)
- return false;
- Start = SearchForStart(Preheader);
- // This is not a CTR loop candidate.
- if (!Start)
- return false;
- // If CTR is live to the preheader, we can not redefine the CTR register.
- if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
- InvalidCTRLoop = true;
- // Make sure there is also no CTR clobber in the block preheader between the
- // begin and MTCTR.
- for (MachineBasicBlock::reverse_instr_iterator I =
- std::next(Start->getReverseIterator());
- I != Preheader->instr_rend(); ++I)
- // Only check the definitions of CTR. If there is non-dead definition for
- // the CTR, we conservatively don't generate a CTR loop.
- if (isCTRClobber(&*I, /* CheckReads */ false)) {
- InvalidCTRLoop = true;
- break;
- }
- // Make sure there is also no CTR clobber/user in the block preheader between
- // MTCTR and the end.
- for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
- I != Preheader->instr_end(); ++I)
- if (isCTRClobber(&*I, /* CheckReads */ true)) {
- InvalidCTRLoop = true;
- break;
- }
- // Find the CTR loop components and decide whether or not to fall back to a
- // normal loop.
- for (auto *MBB : reverse(ML->getBlocks())) {
- for (auto &MI : *MBB) {
- if (MI.getOpcode() == PPC::DecreaseCTRloop ||
- MI.getOpcode() == PPC::DecreaseCTR8loop)
- Dec = &MI;
- else if (!InvalidCTRLoop)
- // If any instruction clobber CTR, then we can not generate a CTR loop.
- InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
- }
- if (Dec && InvalidCTRLoop)
- break;
- }
- assert(Dec && "CTR loop is not complete!");
- if (InvalidCTRLoop) {
- expandNormalLoops(ML, Start, Dec);
- ++NumNormalLoops;
- }
- else {
- expandCTRLoops(ML, Start, Dec);
- ++NumCTRLoops;
- }
- return true;
- }
- void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
- MachineInstr *Dec) {
- bool Is64Bit =
- Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
- MachineBasicBlock *Preheader = Start->getParent();
- MachineBasicBlock *Exiting = Dec->getParent();
- assert((Preheader && Exiting) &&
- "Preheader and exiting should exist for CTR loop!");
- assert(Dec->getOperand(1).getImm() == 1 &&
- "Loop decrement stride must be 1");
- unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
- Register PHIDef =
- MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
- : &PPC::GPRC_and_GPRC_NOR0RegClass);
- Start->getParent()->getParent()->getProperties().reset(
- MachineFunctionProperties::Property::NoPHIs);
- // Generate "PHI" in the header block.
- auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
- DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
- PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
- Register ADDIDef =
- MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
- : &PPC::GPRC_and_GPRC_NOR0RegClass);
- // Generate "addi -1" in the exiting block.
- BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
- .addReg(PHIDef)
- .addImm(-1);
- // Add other inputs for the PHI node.
- if (ML->isLoopLatch(Exiting)) {
- // There must be only two predecessors for the loop header, one is the
- // Preheader and the other one is loop latch Exiting. In hardware loop
- // insertion pass, the block containing DecreaseCTRloop must dominate all
- // loop latches. So there must be only one latch.
- assert(ML->getHeader()->pred_size() == 2 &&
- "Loop header predecessor is not right!");
- PHIMIB.addReg(ADDIDef).addMBB(Exiting);
- } else {
- // If the block containing DecreaseCTRloop is not a loop latch, we can use
- // ADDIDef as the value for all other blocks for the PHI. In hardware loop
- // insertion pass, the block containing DecreaseCTRloop must dominate all
- // loop latches.
- for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
- if (ML->contains(P)) {
- assert(ML->isLoopLatch(P) &&
- "Loop's header in-loop predecessor is not loop latch!");
- PHIMIB.addReg(ADDIDef).addMBB(P);
- } else
- assert(P == Preheader &&
- "CTR loop should not be generated for irreducible loop!");
- }
- }
- // Generate the compare in the exiting block.
- Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
- auto CMPMIB =
- BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
- .addReg(ADDIDef)
- .addImm(0);
- BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
- Dec->getOperand(0).getReg())
- .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
- // Remove the pseudo instructions.
- Start->eraseFromParent();
- Dec->eraseFromParent();
- }
- void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
- MachineInstr *Dec) {
- bool Is64Bit =
- Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
- MachineBasicBlock *Preheader = Start->getParent();
- MachineBasicBlock *Exiting = Dec->getParent();
- (void)Preheader;
- assert((Preheader && Exiting) &&
- "Preheader and exiting should exist for CTR loop!");
- assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
- unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
- unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
- auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
- assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
- "There should be only one user for loop decrement pseudo!");
- unsigned Opcode = 0;
- switch (BrInstr->getOpcode()) {
- case PPC::BC:
- Opcode = BDNZOpcode;
- (void) ML;
- assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
- "Invalid ctr loop!");
- break;
- case PPC::BCn:
- Opcode = BDZOpcode;
- assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
- "Invalid ctr loop!");
- break;
- default:
- llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
- }
- // Generate "bdnz/bdz" in the exiting block just before the terminator.
- BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
- .addMBB(BrInstr->getOperand(1).getMBB());
- // Remove the pseudo instructions.
- BrInstr->eraseFromParent();
- Dec->eraseFromParent();
- }
|