123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // The purpose of this pass is to employ a canonical code transformation so
- // that code compiled with slightly different IR passes can be diffed more
- // effectively than otherwise. This is done by renaming vregs in a given
- // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
- // move defs closer to their use inorder to reduce diffs caused by slightly
- // different schedules.
- //
- // Basic Usage:
- //
- // llc -o - -run-pass mir-canonicalizer example.mir
- //
- // Reorders instructions canonically.
- // Renames virtual register operands canonically.
- // Strips certain MIR artifacts (optionally).
- //
- //===----------------------------------------------------------------------===//
- #include "MIRVRegNamerUtils.h"
- #include "llvm/ADT/PostOrderIterator.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/Passes.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/raw_ostream.h"
- #include <queue>
- using namespace llvm;
- #define DEBUG_TYPE "mir-canonicalizer"
- static cl::opt<unsigned>
- CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
- cl::value_desc("N"),
- cl::desc("Function number to canonicalize."));
- namespace {
- class MIRCanonicalizer : public MachineFunctionPass {
- public:
- static char ID;
- MIRCanonicalizer() : MachineFunctionPass(ID) {}
- StringRef getPassName() const override {
- return "Rename register operands in a canonical ordering.";
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- bool runOnMachineFunction(MachineFunction &MF) override;
- };
- } // end anonymous namespace
- char MIRCanonicalizer::ID;
- char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
- INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
- "Rename Register Operands Canonically", false, false)
- INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
- "Rename Register Operands Canonically", false, false)
- static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
- if (MF.empty())
- return {};
- ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
- std::vector<MachineBasicBlock *> RPOList;
- append_range(RPOList, RPOT);
- return RPOList;
- }
- static bool
- rescheduleLexographically(std::vector<MachineInstr *> instructions,
- MachineBasicBlock *MBB,
- std::function<MachineBasicBlock::iterator()> getPos) {
- bool Changed = false;
- using StringInstrPair = std::pair<std::string, MachineInstr *>;
- std::vector<StringInstrPair> StringInstrMap;
- for (auto *II : instructions) {
- std::string S;
- raw_string_ostream OS(S);
- II->print(OS);
- OS.flush();
- // Trim the assignment, or start from the beginning in the case of a store.
- const size_t i = S.find('=');
- StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
- }
- llvm::sort(StringInstrMap,
- [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
- return (a.first < b.first);
- });
- for (auto &II : StringInstrMap) {
- LLVM_DEBUG({
- dbgs() << "Splicing ";
- II.second->dump();
- dbgs() << " right before: ";
- getPos()->dump();
- });
- Changed = true;
- MBB->splice(getPos(), MBB, II.second);
- }
- return Changed;
- }
- static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
- MachineBasicBlock *MBB) {
- bool Changed = false;
- // Calculates the distance of MI from the beginning of its parent BB.
- auto getInstrIdx = [](const MachineInstr &MI) {
- unsigned i = 0;
- for (auto &CurMI : *MI.getParent()) {
- if (&CurMI == &MI)
- return i;
- i++;
- }
- return ~0U;
- };
- // Pre-Populate vector of instructions to reschedule so that we don't
- // clobber the iterator.
- std::vector<MachineInstr *> Instructions;
- for (auto &MI : *MBB) {
- Instructions.push_back(&MI);
- }
- std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
- std::map<unsigned, MachineInstr *> MultiUserLookup;
- unsigned UseToBringDefCloserToCount = 0;
- std::vector<MachineInstr *> PseudoIdempotentInstructions;
- std::vector<unsigned> PhysRegDefs;
- for (auto *II : Instructions) {
- for (unsigned i = 1; i < II->getNumOperands(); i++) {
- MachineOperand &MO = II->getOperand(i);
- if (!MO.isReg())
- continue;
- if (Register::isVirtualRegister(MO.getReg()))
- continue;
- if (!MO.isDef())
- continue;
- PhysRegDefs.push_back(MO.getReg());
- }
- }
- for (auto *II : Instructions) {
- if (II->getNumOperands() == 0)
- continue;
- if (II->mayLoadOrStore())
- continue;
- MachineOperand &MO = II->getOperand(0);
- if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
- continue;
- if (!MO.isDef())
- continue;
- bool IsPseudoIdempotent = true;
- for (unsigned i = 1; i < II->getNumOperands(); i++) {
- if (II->getOperand(i).isImm()) {
- continue;
- }
- if (II->getOperand(i).isReg()) {
- if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
- if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
- continue;
- }
- }
- IsPseudoIdempotent = false;
- break;
- }
- if (IsPseudoIdempotent) {
- PseudoIdempotentInstructions.push_back(II);
- continue;
- }
- LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
- MachineInstr *Def = II;
- unsigned Distance = ~0U;
- MachineInstr *UseToBringDefCloserTo = nullptr;
- MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
- for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
- MachineInstr *UseInst = UO.getParent();
- const unsigned DefLoc = getInstrIdx(*Def);
- const unsigned UseLoc = getInstrIdx(*UseInst);
- const unsigned Delta = (UseLoc - DefLoc);
- if (UseInst->getParent() != Def->getParent())
- continue;
- if (DefLoc >= UseLoc)
- continue;
- if (Delta < Distance) {
- Distance = Delta;
- UseToBringDefCloserTo = UseInst;
- MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
- }
- }
- const auto BBE = MBB->instr_end();
- MachineBasicBlock::iterator DefI = BBE;
- MachineBasicBlock::iterator UseI = BBE;
- for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
- if (DefI != BBE && UseI != BBE)
- break;
- if (&*BBI == Def) {
- DefI = BBI;
- continue;
- }
- if (&*BBI == UseToBringDefCloserTo) {
- UseI = BBI;
- continue;
- }
- }
- if (DefI == BBE || UseI == BBE)
- continue;
- LLVM_DEBUG({
- dbgs() << "Splicing ";
- DefI->dump();
- dbgs() << " right before: ";
- UseI->dump();
- });
- MultiUsers[UseToBringDefCloserTo].push_back(Def);
- Changed = true;
- MBB->splice(UseI, MBB, DefI);
- }
- // Sort the defs for users of multiple defs lexographically.
- for (const auto &E : MultiUserLookup) {
- auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
- return &MI == E.second;
- });
- if (UseI == MBB->instr_end())
- continue;
- LLVM_DEBUG(
- dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
- Changed |= rescheduleLexographically(
- MultiUsers[E.second], MBB,
- [&]() -> MachineBasicBlock::iterator { return UseI; });
- }
- PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
- LLVM_DEBUG(
- dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
- Changed |= rescheduleLexographically(
- PseudoIdempotentInstructions, MBB,
- [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
- return Changed;
- }
- static bool propagateLocalCopies(MachineBasicBlock *MBB) {
- bool Changed = false;
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- std::vector<MachineInstr *> Copies;
- for (MachineInstr &MI : MBB->instrs()) {
- if (MI.isCopy())
- Copies.push_back(&MI);
- }
- for (MachineInstr *MI : Copies) {
- if (!MI->getOperand(0).isReg())
- continue;
- if (!MI->getOperand(1).isReg())
- continue;
- const Register Dst = MI->getOperand(0).getReg();
- const Register Src = MI->getOperand(1).getReg();
- if (!Register::isVirtualRegister(Dst))
- continue;
- if (!Register::isVirtualRegister(Src))
- continue;
- // Not folding COPY instructions if regbankselect has not set the RCs.
- // Why are we only considering Register Classes? Because the verifier
- // sometimes gets upset if the register classes don't match even if the
- // types do. A future patch might add COPY folding for matching types in
- // pre-registerbankselect code.
- if (!MRI.getRegClassOrNull(Dst))
- continue;
- if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
- continue;
- std::vector<MachineOperand *> Uses;
- for (MachineOperand &MO : MRI.use_operands(Dst))
- Uses.push_back(&MO);
- for (auto *MO : Uses)
- MO->setReg(Src);
- Changed = true;
- MI->eraseFromParent();
- }
- return Changed;
- }
- static bool doDefKillClear(MachineBasicBlock *MBB) {
- bool Changed = false;
- for (auto &MI : *MBB) {
- for (auto &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- if (!MO.isDef() && MO.isKill()) {
- Changed = true;
- MO.setIsKill(false);
- }
- if (MO.isDef() && MO.isDead()) {
- Changed = true;
- MO.setIsDead(false);
- }
- }
- }
- return Changed;
- }
- static bool runOnBasicBlock(MachineBasicBlock *MBB,
- unsigned BasicBlockNum, VRegRenamer &Renamer) {
- LLVM_DEBUG({
- dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
- dbgs() << "\n\n================================================\n\n";
- });
- bool Changed = false;
- LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
- LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
- MBB->dump(););
- Changed |= propagateLocalCopies(MBB);
- LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
- LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
- unsigned IdempotentInstCount = 0;
- Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
- LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
- Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
- // TODO: Consider dropping this. Dropping kill defs is probably not
- // semantically sound.
- Changed |= doDefKillClear(MBB);
- LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
- dbgs() << "\n";);
- LLVM_DEBUG(
- dbgs() << "\n\n================================================\n\n");
- return Changed;
- }
- bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
- static unsigned functionNum = 0;
- if (CanonicalizeFunctionNumber != ~0U) {
- if (CanonicalizeFunctionNumber != functionNum++)
- return false;
- LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
- << "\n";);
- }
- // we need a valid vreg to create a vreg type for skipping all those
- // stray vreg numbers so reach alignment/canonical vreg values.
- std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
- LLVM_DEBUG(
- dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
- dbgs() << "\n\n================================================\n\n";
- dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
- for (auto MBB
- : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
- << "\n\n================================================\n\n";);
- unsigned BBNum = 0;
- bool Changed = false;
- MachineRegisterInfo &MRI = MF.getRegInfo();
- VRegRenamer Renamer(MRI);
- for (auto MBB : RPOList)
- Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
- return Changed;
- }
|