123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562 |
- //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // A pre-emit peephole for catching opportunities introduced by late passes such
- // as MachineBlockPlacement.
- //
- //===----------------------------------------------------------------------===//
- #include "PPC.h"
- #include "PPCInstrInfo.h"
- #include "PPCSubtarget.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/Statistic.h"
- #include "llvm/CodeGen/LivePhysRegs.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/MC/MCContext.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/Debug.h"
- using namespace llvm;
- #define DEBUG_TYPE "ppc-pre-emit-peephole"
- STATISTIC(NumRRConvertedInPreEmit,
- "Number of r+r instructions converted to r+i in pre-emit peephole");
- STATISTIC(NumRemovedInPreEmit,
- "Number of instructions deleted in pre-emit peephole");
- STATISTIC(NumberOfSelfCopies,
- "Number of self copy instructions eliminated");
- STATISTIC(NumFrameOffFoldInPreEmit,
- "Number of folding frame offset by using r+r in pre-emit peephole");
- static cl::opt<bool>
- EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
- cl::desc("enable PC Relative linker optimization"));
- static cl::opt<bool>
- RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
- cl::desc("Run pre-emit peephole optimizations."));
- namespace {
- static bool hasPCRelativeForm(MachineInstr &Use) {
- switch (Use.getOpcode()) {
- default:
- return false;
- case PPC::LBZ:
- case PPC::LBZ8:
- case PPC::LHA:
- case PPC::LHA8:
- case PPC::LHZ:
- case PPC::LHZ8:
- case PPC::LWZ:
- case PPC::LWZ8:
- case PPC::STB:
- case PPC::STB8:
- case PPC::STH:
- case PPC::STH8:
- case PPC::STW:
- case PPC::STW8:
- case PPC::LD:
- case PPC::STD:
- case PPC::LWA:
- case PPC::LXSD:
- case PPC::LXSSP:
- case PPC::LXV:
- case PPC::STXSD:
- case PPC::STXSSP:
- case PPC::STXV:
- case PPC::LFD:
- case PPC::LFS:
- case PPC::STFD:
- case PPC::STFS:
- case PPC::DFLOADf32:
- case PPC::DFLOADf64:
- case PPC::DFSTOREf32:
- case PPC::DFSTOREf64:
- return true;
- }
- }
- class PPCPreEmitPeephole : public MachineFunctionPass {
- public:
- static char ID;
- PPCPreEmitPeephole() : MachineFunctionPass(ID) {
- initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
- // This function removes any redundant load immediates. It has two level
- // loops - The outer loop finds the load immediates BBI that could be used
- // to replace following redundancy. The inner loop scans instructions that
- // after BBI to find redundancy and update kill/dead flags accordingly. If
- // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
- // that modify the def register of BBI would break the scanning.
- // DeadOrKillToUnset is a pointer to the previous operand that had the
- // kill/dead flag set. It keeps track of the def register of BBI, the use
- // registers of AfterBBIs and the def registers of AfterBBIs.
- bool removeRedundantLIs(MachineBasicBlock &MBB,
- const TargetRegisterInfo *TRI) {
- LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
- MBB.dump(); dbgs() << "\n");
- DenseSet<MachineInstr *> InstrsToErase;
- for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
- // Skip load immediate that is marked to be erased later because it
- // cannot be used to replace any other instructions.
- if (InstrsToErase.contains(&*BBI))
- continue;
- // Skip non-load immediate.
- unsigned Opc = BBI->getOpcode();
- if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
- Opc != PPC::LIS8)
- continue;
- // Skip load immediate, where the operand is a relocation (e.g., $r3 =
- // LI target-flags(ppc-lo) %const.0).
- if (!BBI->getOperand(1).isImm())
- continue;
- assert(BBI->getOperand(0).isReg() &&
- "Expected a register for the first operand");
- LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
- Register Reg = BBI->getOperand(0).getReg();
- int64_t Imm = BBI->getOperand(1).getImm();
- MachineOperand *DeadOrKillToUnset = nullptr;
- if (BBI->getOperand(0).isDead()) {
- DeadOrKillToUnset = &BBI->getOperand(0);
- LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
- << " from load immediate " << *BBI
- << " is a unsetting candidate\n");
- }
- // This loop scans instructions after BBI to see if there is any
- // redundant load immediate.
- for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
- ++AfterBBI) {
- // Track the operand that kill Reg. We would unset the kill flag of
- // the operand if there is a following redundant load immediate.
- int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
- // We can't just clear implicit kills, so if we encounter one, stop
- // looking further.
- if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
- LLVM_DEBUG(dbgs()
- << "Encountered an implicit kill, cannot proceed: ");
- LLVM_DEBUG(AfterBBI->dump());
- break;
- }
- if (KillIdx != -1) {
- assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
- DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
- LLVM_DEBUG(dbgs()
- << " Kill flag of " << *DeadOrKillToUnset << " from "
- << *AfterBBI << " is a unsetting candidate\n");
- }
- if (!AfterBBI->modifiesRegister(Reg, TRI))
- continue;
- // Finish scanning because Reg is overwritten by a non-load
- // instruction.
- if (AfterBBI->getOpcode() != Opc)
- break;
- assert(AfterBBI->getOperand(0).isReg() &&
- "Expected a register for the first operand");
- // Finish scanning because Reg is overwritten by a relocation or a
- // different value.
- if (!AfterBBI->getOperand(1).isImm() ||
- AfterBBI->getOperand(1).getImm() != Imm)
- break;
- // It loads same immediate value to the same Reg, which is redundant.
- // We would unset kill flag in previous Reg usage to extend live range
- // of Reg first, then remove the redundancy.
- if (DeadOrKillToUnset) {
- LLVM_DEBUG(dbgs()
- << " Unset dead/kill flag of " << *DeadOrKillToUnset
- << " from " << *DeadOrKillToUnset->getParent());
- if (DeadOrKillToUnset->isDef())
- DeadOrKillToUnset->setIsDead(false);
- else
- DeadOrKillToUnset->setIsKill(false);
- }
- DeadOrKillToUnset =
- AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
- if (DeadOrKillToUnset)
- LLVM_DEBUG(dbgs()
- << " Dead flag of " << *DeadOrKillToUnset << " from "
- << *AfterBBI << " is a unsetting candidate\n");
- InstrsToErase.insert(&*AfterBBI);
- LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
- AfterBBI->dump());
- }
- }
- for (MachineInstr *MI : InstrsToErase) {
- MI->eraseFromParent();
- }
- NumRemovedInPreEmit += InstrsToErase.size();
- return !InstrsToErase.empty();
- }
- // Check if this instruction is a PLDpc that is part of a GOT indirect
- // access.
- bool isGOTPLDpc(MachineInstr &Instr) {
- if (Instr.getOpcode() != PPC::PLDpc)
- return false;
- // The result must be a register.
- const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
- if (!LoadedAddressReg.isReg())
- return false;
- // Make sure that this is a global symbol.
- const MachineOperand &SymbolOp = Instr.getOperand(1);
- if (!SymbolOp.isGlobal())
- return false;
- // Finally return true only if the GOT flag is present.
- return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
- }
- bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
- MachineFunction *MF = MBB.getParent();
- // If the linker opt is disabled then just return.
- if (!EnablePCRelLinkerOpt)
- return false;
- // Add this linker opt only if we are using PC Relative memops.
- if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
- return false;
- // Struct to keep track of one def/use pair for a GOT indirect access.
- struct GOTDefUsePair {
- MachineBasicBlock::iterator DefInst;
- MachineBasicBlock::iterator UseInst;
- Register DefReg;
- Register UseReg;
- bool StillValid;
- };
- // Vector of def/ues pairs in this basic block.
- SmallVector<GOTDefUsePair, 4> CandPairs;
- SmallVector<GOTDefUsePair, 4> ValidPairs;
- bool MadeChange = false;
- // Run through all of the instructions in the basic block and try to
- // collect potential pairs of GOT indirect access instructions.
- for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
- // Look for the initial GOT indirect load.
- if (isGOTPLDpc(*BBI)) {
- GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
- BBI->getOperand(0).getReg(),
- PPC::NoRegister, true};
- CandPairs.push_back(CurrentPair);
- continue;
- }
- // We haven't encountered any new PLD instructions, nothing to check.
- if (CandPairs.empty())
- continue;
- // Run through the candidate pairs and see if any of the registers
- // defined in the PLD instructions are used by this instruction.
- // Note: the size of CandPairs can change in the loop.
- for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
- GOTDefUsePair &Pair = CandPairs[Idx];
- // The instruction does not use or modify this PLD's def reg,
- // ignore it.
- if (!BBI->readsRegister(Pair.DefReg, TRI) &&
- !BBI->modifiesRegister(Pair.DefReg, TRI))
- continue;
- // The use needs to be used in the address compuation and not
- // as the register being stored for a store.
- const MachineOperand *UseOp =
- hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
- // Check for a valid use.
- if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
- UseOp->isUse() && UseOp->isKill()) {
- Pair.UseInst = BBI;
- Pair.UseReg = BBI->getOperand(0).getReg();
- ValidPairs.push_back(Pair);
- }
- CandPairs.erase(CandPairs.begin() + Idx);
- }
- }
- // Go through all of the pairs and check for any more valid uses.
- for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
- // We shouldn't be here if we don't have a valid pair.
- assert(Pair->UseInst.isValid() && Pair->StillValid &&
- "Kept an invalid def/use pair for GOT PCRel opt");
- // We have found a potential pair. Search through the instructions
- // between the def and the use to see if it is valid to mark this as a
- // linker opt.
- MachineBasicBlock::iterator BBI = Pair->DefInst;
- ++BBI;
- for (; BBI != Pair->UseInst; ++BBI) {
- if (BBI->readsRegister(Pair->UseReg, TRI) ||
- BBI->modifiesRegister(Pair->UseReg, TRI)) {
- Pair->StillValid = false;
- break;
- }
- }
- if (!Pair->StillValid)
- continue;
- // The load/store instruction that uses the address from the PLD will
- // either use a register (for a store) or define a register (for the
- // load). That register will be added as an implicit def to the PLD
- // and as an implicit use on the second memory op. This is a precaution
- // to prevent future passes from using that register between the two
- // instructions.
- MachineOperand ImplDef =
- MachineOperand::CreateReg(Pair->UseReg, true, true);
- MachineOperand ImplUse =
- MachineOperand::CreateReg(Pair->UseReg, false, true);
- Pair->DefInst->addOperand(ImplDef);
- Pair->UseInst->addOperand(ImplUse);
- // Create the symbol.
- MCContext &Context = MF->getContext();
- MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
- MachineOperand PCRelLabel =
- MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
- Pair->DefInst->addOperand(*MF, PCRelLabel);
- Pair->UseInst->addOperand(*MF, PCRelLabel);
- MadeChange |= true;
- }
- return MadeChange;
- }
- // This function removes redundant pairs of accumulator prime/unprime
- // instructions. In some situations, it's possible the compiler inserts an
- // accumulator prime instruction followed by an unprime instruction (e.g.
- // when we store an accumulator after restoring it from a spill). If the
- // accumulator is not used between the two, they can be removed. This
- // function removes these redundant pairs from basic blocks.
- // The algorithm is quite straightforward - every time we encounter a prime
- // instruction, the primed register is added to a candidate set. Any use
- // other than a prime removes the candidate from the set and any de-prime
- // of a current candidate marks both the prime and de-prime for removal.
- // This way we ensure we only remove prime/de-prime *pairs* with no
- // intervening uses.
- bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
- DenseSet<MachineInstr *> InstrsToErase;
- // Initially, none of the acc registers are candidates.
- SmallVector<MachineInstr *, 8> Candidates(
- PPC::UACCRCRegClass.getNumRegs(), nullptr);
- for (MachineInstr &BBI : MBB.instrs()) {
- unsigned Opc = BBI.getOpcode();
- // If we are visiting a xxmtacc instruction, we add it and its operand
- // register to the candidate set.
- if (Opc == PPC::XXMTACC) {
- Register Acc = BBI.getOperand(0).getReg();
- assert(PPC::ACCRCRegClass.contains(Acc) &&
- "Unexpected register for XXMTACC");
- Candidates[Acc - PPC::ACC0] = &BBI;
- }
- // If we are visiting a xxmfacc instruction and its operand register is
- // in the candidate set, we mark the two instructions for removal.
- else if (Opc == PPC::XXMFACC) {
- Register Acc = BBI.getOperand(0).getReg();
- assert(PPC::ACCRCRegClass.contains(Acc) &&
- "Unexpected register for XXMFACC");
- if (!Candidates[Acc - PPC::ACC0])
- continue;
- InstrsToErase.insert(&BBI);
- InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
- }
- // If we are visiting an instruction using an accumulator register
- // as operand, we remove it from the candidate set.
- else {
- for (MachineOperand &Operand : BBI.operands()) {
- if (!Operand.isReg())
- continue;
- Register Reg = Operand.getReg();
- if (PPC::ACCRCRegClass.contains(Reg))
- Candidates[Reg - PPC::ACC0] = nullptr;
- }
- }
- }
- for (MachineInstr *MI : InstrsToErase)
- MI->eraseFromParent();
- NumRemovedInPreEmit += InstrsToErase.size();
- return !InstrsToErase.empty();
- }
- bool runOnMachineFunction(MachineFunction &MF) override {
- if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
- // Remove UNENCODED_NOP even when this pass is disabled.
- // This needs to be done unconditionally so we don't emit zeros
- // in the instruction stream.
- SmallVector<MachineInstr *, 4> InstrsToErase;
- for (MachineBasicBlock &MBB : MF)
- for (MachineInstr &MI : MBB)
- if (MI.getOpcode() == PPC::UNENCODED_NOP)
- InstrsToErase.push_back(&MI);
- for (MachineInstr *MI : InstrsToErase)
- MI->eraseFromParent();
- return false;
- }
- bool Changed = false;
- const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- SmallVector<MachineInstr *, 4> InstrsToErase;
- for (MachineBasicBlock &MBB : MF) {
- Changed |= removeRedundantLIs(MBB, TRI);
- Changed |= addLinkerOpt(MBB, TRI);
- Changed |= removeAccPrimeUnprime(MBB);
- for (MachineInstr &MI : MBB) {
- unsigned Opc = MI.getOpcode();
- if (Opc == PPC::UNENCODED_NOP) {
- InstrsToErase.push_back(&MI);
- continue;
- }
- // Detect self copies - these can result from running AADB.
- if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
- const MCInstrDesc &MCID = TII->get(Opc);
- if (MCID.getNumOperands() == 3 &&
- MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
- MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
- NumberOfSelfCopies++;
- LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
- LLVM_DEBUG(MI.dump());
- InstrsToErase.push_back(&MI);
- continue;
- }
- else if (MCID.getNumOperands() == 2 &&
- MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
- NumberOfSelfCopies++;
- LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
- LLVM_DEBUG(MI.dump());
- InstrsToErase.push_back(&MI);
- continue;
- }
- }
- MachineInstr *DefMIToErase = nullptr;
- if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
- Changed = true;
- NumRRConvertedInPreEmit++;
- LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
- LLVM_DEBUG(MI.dump());
- if (DefMIToErase) {
- InstrsToErase.push_back(DefMIToErase);
- }
- }
- if (TII->foldFrameOffset(MI)) {
- Changed = true;
- NumFrameOffFoldInPreEmit++;
- LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
- LLVM_DEBUG(MI.dump());
- }
- }
- // Eliminate conditional branch based on a constant CR bit by
- // CRSET or CRUNSET. We eliminate the conditional branch or
- // convert it into an unconditional branch. Also, if the CR bit
- // is not used by other instructions, we eliminate CRSET as well.
- auto I = MBB.getFirstInstrTerminator();
- if (I == MBB.instr_end())
- continue;
- MachineInstr *Br = &*I;
- if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
- continue;
- MachineInstr *CRSetMI = nullptr;
- Register CRBit = Br->getOperand(0).getReg();
- unsigned CRReg = getCRFromCRBit(CRBit);
- bool SeenUse = false;
- MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
- for (It++; It != Er; It++) {
- if (It->modifiesRegister(CRBit, TRI)) {
- if ((It->getOpcode() == PPC::CRUNSET ||
- It->getOpcode() == PPC::CRSET) &&
- It->getOperand(0).getReg() == CRBit)
- CRSetMI = &*It;
- break;
- }
- if (It->readsRegister(CRBit, TRI))
- SeenUse = true;
- }
- if (!CRSetMI) continue;
- unsigned CRSetOp = CRSetMI->getOpcode();
- if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
- (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) {
- // Remove this branch since it cannot be taken.
- InstrsToErase.push_back(Br);
- MBB.removeSuccessor(Br->getOperand(1).getMBB());
- }
- else {
- // This conditional branch is always taken. So, remove all branches
- // and insert an unconditional branch to the destination of this.
- MachineBasicBlock::iterator It = Br, Er = MBB.end();
- for (; It != Er; It++) {
- if (It->isDebugInstr()) continue;
- assert(It->isTerminator() && "Non-terminator after a terminator");
- InstrsToErase.push_back(&*It);
- }
- if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
- ArrayRef<MachineOperand> NoCond;
- TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
- NoCond, Br->getDebugLoc());
- }
- for (auto &Succ : MBB.successors())
- if (Succ != Br->getOperand(1).getMBB()) {
- MBB.removeSuccessor(Succ);
- break;
- }
- }
- // If the CRBit is not used by another instruction, we can eliminate
- // CRSET/CRUNSET instruction.
- if (!SeenUse) {
- // We need to check use of the CRBit in successors.
- for (auto &SuccMBB : MBB.successors())
- if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
- SeenUse = true;
- break;
- }
- if (!SeenUse)
- InstrsToErase.push_back(CRSetMI);
- }
- }
- for (MachineInstr *MI : InstrsToErase) {
- LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
- LLVM_DEBUG(MI->dump());
- MI->eraseFromParent();
- NumRemovedInPreEmit++;
- }
- return Changed;
- }
- };
- }
- INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
- false, false)
- char PPCPreEmitPeephole::ID = 0;
- FunctionPass *llvm::createPPCPreEmitPeepholePass() {
- return new PPCPreEmitPeephole();
- }
|