1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291 |
- //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // \file
- // Implementation file for the IRSimilarityIdentifier for identifying
- // similarities in IR including the IRInstructionMapper.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/Analysis/IRSimilarityIdentifier.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/IR/Intrinsics.h"
- #include "llvm/IR/Operator.h"
- #include "llvm/IR/User.h"
- #include "llvm/InitializePasses.h"
- #include "llvm/Support/SuffixTree.h"
- using namespace llvm;
- using namespace IRSimilarity;
- namespace llvm {
- cl::opt<bool>
- DisableBranches("no-ir-sim-branch-matching", cl::init(false),
- cl::ReallyHidden,
- cl::desc("disable similarity matching, and outlining, "
- "across branches for debugging purposes."));
- cl::opt<bool>
- DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
- cl::ReallyHidden,
- cl::desc("disable outlining indirect calls."));
- cl::opt<bool>
- MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
- cl::desc("only allow matching call instructions if the "
- "name and type signature match."));
- cl::opt<bool>
- DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
- cl::desc("Don't match or outline intrinsics"));
- } // namespace llvm
- IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
- IRInstructionDataList &IDList)
- : Inst(&I), Legal(Legality), IDL(&IDList) {
- initializeInstruction();
- }
- void IRInstructionData::initializeInstruction() {
- // We check for whether we have a comparison instruction. If it is, we
- // find the "less than" version of the predicate for consistency for
- // comparison instructions throught the program.
- if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
- CmpInst::Predicate Predicate = predicateForConsistency(C);
- if (Predicate != C->getPredicate())
- RevisedPredicate = Predicate;
- }
- // Here we collect the operands and their types for determining whether
- // the structure of the operand use matches between two different candidates.
- for (Use &OI : Inst->operands()) {
- if (isa<CmpInst>(Inst) && RevisedPredicate) {
- // If we have a CmpInst where the predicate is reversed, it means the
- // operands must be reversed as well.
- OperVals.insert(OperVals.begin(), OI.get());
- continue;
- }
- OperVals.push_back(OI.get());
- }
- // We capture the incoming BasicBlocks as values as well as the incoming
- // Values in order to check for structural similarity.
- if (PHINode *PN = dyn_cast<PHINode>(Inst))
- for (BasicBlock *BB : PN->blocks())
- OperVals.push_back(BB);
- }
- IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
- : IDL(&IDList) {}
- void IRInstructionData::setBranchSuccessors(
- DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
- assert(isa<BranchInst>(Inst) && "Instruction must be branch");
- BranchInst *BI = cast<BranchInst>(Inst);
- DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
- BBNumIt = BasicBlockToInteger.find(BI->getParent());
- assert(BBNumIt != BasicBlockToInteger.end() &&
- "Could not find location for BasicBlock!");
- int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
- for (BasicBlock *Successor : BI->successors()) {
- BBNumIt = BasicBlockToInteger.find(Successor);
- assert(BBNumIt != BasicBlockToInteger.end() &&
- "Could not find number for BasicBlock!");
- int OtherBlockNumber = static_cast<int>(BBNumIt->second);
- int Relative = OtherBlockNumber - CurrentBlockNumber;
- RelativeBlockLocations.push_back(Relative);
- }
- }
- void IRInstructionData::setCalleeName(bool MatchByName) {
- CallInst *CI = dyn_cast<CallInst>(Inst);
- assert(CI && "Instruction must be call");
- CalleeName = "";
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // To hash intrinsics, we use the opcode, and types like the other
- // instructions, but also, the Intrinsic ID, and the Name of the
- // intrinsic.
- Intrinsic::ID IntrinsicID = II->getIntrinsicID();
- FunctionType *FT = II->getFunctionType();
- // If there is an overloaded name, we have to use the complex version
- // of getName to get the entire string.
- if (Intrinsic::isOverloaded(IntrinsicID))
- CalleeName =
- Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
- // If there is not an overloaded name, we only need to use this version.
- else
- CalleeName = Intrinsic::getName(IntrinsicID).str();
- return;
- }
- if (!CI->isIndirectCall() && MatchByName)
- CalleeName = CI->getCalledFunction()->getName().str();
- }
- void IRInstructionData::setPHIPredecessors(
- DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
- assert(isa<PHINode>(Inst) && "Instruction must be phi node");
- PHINode *PN = cast<PHINode>(Inst);
- DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
- BBNumIt = BasicBlockToInteger.find(PN->getParent());
- assert(BBNumIt != BasicBlockToInteger.end() &&
- "Could not find location for BasicBlock!");
- int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
- // Convert the incoming blocks of the PHINode to an integer value, based on
- // the relative distances between the current block and the incoming block.
- for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
- BasicBlock *Incoming = PN->getIncomingBlock(Idx);
- BBNumIt = BasicBlockToInteger.find(Incoming);
- assert(BBNumIt != BasicBlockToInteger.end() &&
- "Could not find number for BasicBlock!");
- int OtherBlockNumber = static_cast<int>(BBNumIt->second);
- int Relative = OtherBlockNumber - CurrentBlockNumber;
- RelativeBlockLocations.push_back(Relative);
- RelativeBlockLocations.push_back(Relative);
- }
- }
- CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
- switch (CI->getPredicate()) {
- case CmpInst::FCMP_OGT:
- case CmpInst::FCMP_UGT:
- case CmpInst::FCMP_OGE:
- case CmpInst::FCMP_UGE:
- case CmpInst::ICMP_SGT:
- case CmpInst::ICMP_UGT:
- case CmpInst::ICMP_SGE:
- case CmpInst::ICMP_UGE:
- return CI->getSwappedPredicate();
- default:
- return CI->getPredicate();
- }
- }
- CmpInst::Predicate IRInstructionData::getPredicate() const {
- assert(isa<CmpInst>(Inst) &&
- "Can only get a predicate from a compare instruction");
- if (RevisedPredicate)
- return *RevisedPredicate;
- return cast<CmpInst>(Inst)->getPredicate();
- }
- StringRef IRInstructionData::getCalleeName() const {
- assert(isa<CallInst>(Inst) &&
- "Can only get a name from a call instruction");
- assert(CalleeName && "CalleeName has not been set");
- return *CalleeName;
- }
- bool IRSimilarity::isClose(const IRInstructionData &A,
- const IRInstructionData &B) {
- if (!A.Legal || !B.Legal)
- return false;
- // Check if we are performing the same sort of operation on the same types
- // but not on the same values.
- if (!A.Inst->isSameOperationAs(B.Inst)) {
- // If there is a predicate, this means that either there is a swapped
- // predicate, or that the types are different, we want to make sure that
- // the predicates are equivalent via swapping.
- if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
- if (A.getPredicate() != B.getPredicate())
- return false;
- // If the predicates are the same via swap, make sure that the types are
- // still the same.
- auto ZippedTypes = zip(A.OperVals, B.OperVals);
- return all_of(
- ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
- return std::get<0>(R)->getType() == std::get<1>(R)->getType();
- });
- }
- return false;
- }
- // Since any GEP Instruction operands after the first operand cannot be
- // defined by a register, we must make sure that the operands after the first
- // are the same in the two instructions
- if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
- auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
- // If the instructions do not have the same inbounds restrictions, we do
- // not consider them the same.
- if (GEP->isInBounds() != OtherGEP->isInBounds())
- return false;
- auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
- // We increment here since we do not care about the first instruction,
- // we only care about the following operands since they must be the
- // exact same to be considered similar.
- return all_of(drop_begin(ZippedOperands),
- [](std::tuple<llvm::Use &, llvm::Use &> R) {
- return std::get<0>(R) == std::get<1>(R);
- });
- }
- // If the instructions are functions calls, we make sure that the function
- // name is the same. We already know that the types are since is
- // isSameOperationAs is true.
- if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
- if (A.getCalleeName().str() != B.getCalleeName().str())
- return false;
- }
- if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
- A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
- return false;
- return true;
- }
- // TODO: This is the same as the MachineOutliner, and should be consolidated
- // into the same interface.
- void IRInstructionMapper::convertToUnsignedVec(
- BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping) {
- BasicBlock::iterator It = BB.begin();
- std::vector<unsigned> IntegerMappingForBB;
- std::vector<IRInstructionData *> InstrListForBB;
- for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
- switch (InstClassifier.visit(*It)) {
- case InstrType::Legal:
- mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
- break;
- case InstrType::Illegal:
- mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
- break;
- case InstrType::Invisible:
- AddedIllegalLastTime = false;
- break;
- }
- }
- if (AddedIllegalLastTime)
- mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
- for (IRInstructionData *ID : InstrListForBB)
- this->IDL->push_back(*ID);
- llvm::append_range(InstrList, InstrListForBB);
- llvm::append_range(IntegerMapping, IntegerMappingForBB);
- }
- // TODO: This is the same as the MachineOutliner, and should be consolidated
- // into the same interface.
- unsigned IRInstructionMapper::mapToLegalUnsigned(
- BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
- std::vector<IRInstructionData *> &InstrListForBB) {
- // We added something legal, so we should unset the AddedLegalLastTime
- // flag.
- AddedIllegalLastTime = false;
- // If we have at least two adjacent legal instructions (which may have
- // invisible instructions in between), remember that.
- if (CanCombineWithPrevInstr)
- HaveLegalRange = true;
- CanCombineWithPrevInstr = true;
- // Get the integer for this instruction or give it the current
- // LegalInstrNumber.
- IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
- InstrListForBB.push_back(ID);
- if (isa<BranchInst>(*It))
- ID->setBranchSuccessors(BasicBlockToInteger);
- if (isa<CallInst>(*It))
- ID->setCalleeName(EnableMatchCallsByName);
- if (isa<PHINode>(*It))
- ID->setPHIPredecessors(BasicBlockToInteger);
- // Add to the instruction list
- bool WasInserted;
- DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
- ResultIt;
- std::tie(ResultIt, WasInserted) =
- InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
- unsigned INumber = ResultIt->second;
- // There was an insertion.
- if (WasInserted)
- LegalInstrNumber++;
- IntegerMappingForBB.push_back(INumber);
- // Make sure we don't overflow or use any integers reserved by the DenseMap.
- assert(LegalInstrNumber < IllegalInstrNumber &&
- "Instruction mapping overflow!");
- assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
- "Tried to assign DenseMap tombstone or empty key to instruction.");
- assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
- "Tried to assign DenseMap tombstone or empty key to instruction.");
- return INumber;
- }
- IRInstructionData *
- IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
- IRInstructionDataList &IDL) {
- return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
- }
- IRInstructionData *
- IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
- return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
- }
- IRInstructionDataList *
- IRInstructionMapper::allocateIRInstructionDataList() {
- return new (IDLAllocator->Allocate()) IRInstructionDataList();
- }
- // TODO: This is the same as the MachineOutliner, and should be consolidated
- // into the same interface.
- unsigned IRInstructionMapper::mapToIllegalUnsigned(
- BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
- std::vector<IRInstructionData *> &InstrListForBB, bool End) {
- // Can't combine an illegal instruction. Set the flag.
- CanCombineWithPrevInstr = false;
- // Only add one illegal number per range of legal numbers.
- if (AddedIllegalLastTime)
- return IllegalInstrNumber;
- IRInstructionData *ID = nullptr;
- if (!End)
- ID = allocateIRInstructionData(*It, false, *IDL);
- else
- ID = allocateIRInstructionData(*IDL);
- InstrListForBB.push_back(ID);
- // Remember that we added an illegal number last time.
- AddedIllegalLastTime = true;
- unsigned INumber = IllegalInstrNumber;
- IntegerMappingForBB.push_back(IllegalInstrNumber--);
- assert(LegalInstrNumber < IllegalInstrNumber &&
- "Instruction mapping overflow!");
- assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
- "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
- assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
- "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
- return INumber;
- }
- IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
- IRInstructionData *FirstInstIt,
- IRInstructionData *LastInstIt)
- : StartIdx(StartIdx), Len(Len) {
- assert(FirstInstIt != nullptr && "Instruction is nullptr!");
- assert(LastInstIt != nullptr && "Instruction is nullptr!");
- assert(StartIdx + Len > StartIdx &&
- "Overflow for IRSimilarityCandidate range?");
- assert(Len - 1 == static_cast<unsigned>(std::distance(
- iterator(FirstInstIt), iterator(LastInstIt))) &&
- "Length of the first and last IRInstructionData do not match the "
- "given length");
- // We iterate over the given instructions, and map each unique value
- // to a unique number in the IRSimilarityCandidate ValueToNumber and
- // NumberToValue maps. A constant get its own value globally, the individual
- // uses of the constants are not considered to be unique.
- //
- // IR: Mapping Added:
- // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
- // %add2 = add i32 %a, %1 %add2 -> 4
- // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
- //
- // when replace with global values, starting from 1, would be
- //
- // 3 = add i32 1, 2
- // 4 = add i32 1, 3
- // 6 = add i32 5, 2
- unsigned LocalValNumber = 1;
- IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
- for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
- // Map the operand values to an unsigned integer if it does not already
- // have an unsigned integer assigned to it.
- for (Value *Arg : ID->OperVals)
- if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
- ValueToNumber.try_emplace(Arg, LocalValNumber);
- NumberToValue.try_emplace(LocalValNumber, Arg);
- LocalValNumber++;
- }
- // Mapping the instructions to an unsigned integer if it is not already
- // exist in the mapping.
- if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
- ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
- NumberToValue.try_emplace(LocalValNumber, ID->Inst);
- LocalValNumber++;
- }
- }
- // Setting the first and last instruction data pointers for the candidate. If
- // we got through the entire for loop without hitting an assert, we know
- // that both of these instructions are not nullptrs.
- FirstInst = FirstInstIt;
- LastInst = LastInstIt;
- // Add the basic blocks contained in the set into the global value numbering.
- DenseSet<BasicBlock *> BBSet;
- getBasicBlocks(BBSet);
- for (BasicBlock *BB : BBSet) {
- if (ValueToNumber.find(BB) != ValueToNumber.end())
- continue;
-
- ValueToNumber.try_emplace(BB, LocalValNumber);
- NumberToValue.try_emplace(LocalValNumber, BB);
- LocalValNumber++;
- }
- }
- bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
- const IRSimilarityCandidate &B) {
- if (A.getLength() != B.getLength())
- return false;
- auto InstrDataForBoth =
- zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
- return all_of(InstrDataForBoth,
- [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
- IRInstructionData &A = std::get<0>(R);
- IRInstructionData &B = std::get<1>(R);
- if (!A.Legal || !B.Legal)
- return false;
- return isClose(A, B);
- });
- }
- /// Determine if one or more of the assigned global value numbers for the
- /// operands in \p TargetValueNumbers is in the current mapping set for operand
- /// numbers in \p SourceOperands. The set of possible corresponding global
- /// value numbers are replaced with the most recent version of compatible
- /// values.
- ///
- /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
- /// value number for the source IRInstructionCandidate.
- /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
- /// IRSimilarityCandidate global value numbers to a set of possible numbers in
- /// the target.
- /// \param [in] SourceOperands - The operands in the original
- /// IRSimilarityCandidate in the current instruction.
- /// \param [in] TargetValueNumbers - The global value numbers of the operands in
- /// the corresponding Instruction in the other IRSimilarityCandidate.
- /// \returns true if there exists a possible mapping between the source
- /// Instruction operands and the target Instruction operands, and false if not.
- static bool checkNumberingAndReplaceCommutative(
- const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
- DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
- ArrayRef<Value *> &SourceOperands,
- DenseSet<unsigned> &TargetValueNumbers){
- DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
- unsigned ArgVal;
- bool WasInserted;
- // Iterate over the operands in the source IRSimilarityCandidate to determine
- // whether there exists an operand in the other IRSimilarityCandidate that
- // creates a valid mapping of Value to Value between the
- // IRSimilarityCaniddates.
- for (Value *V : SourceOperands) {
- ArgVal = SourceValueToNumberMapping.find(V)->second;
- // Instead of finding a current mapping, we attempt to insert a set.
- std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
- std::make_pair(ArgVal, TargetValueNumbers));
- // We need to iterate over the items in other IRSimilarityCandidate's
- // Instruction to determine whether there is a valid mapping of
- // Value to Value.
- DenseSet<unsigned> NewSet;
- for (unsigned &Curr : ValueMappingIt->second)
- // If we can find the value in the mapping, we add it to the new set.
- if (TargetValueNumbers.contains(Curr))
- NewSet.insert(Curr);
- // If we could not find a Value, return 0.
- if (NewSet.empty())
- return false;
-
- // Otherwise replace the old mapping with the newly constructed one.
- if (NewSet.size() != ValueMappingIt->second.size())
- ValueMappingIt->second.swap(NewSet);
- // We have reached no conclusions about the mapping, and cannot remove
- // any items from the other operands, so we move to check the next operand.
- if (ValueMappingIt->second.size() != 1)
- continue;
- unsigned ValToRemove = *ValueMappingIt->second.begin();
- // When there is only one item left in the mapping for and operand, remove
- // the value from the other operands. If it results in there being no
- // mapping, return false, it means the mapping is wrong
- for (Value *InnerV : SourceOperands) {
- if (V == InnerV)
- continue;
- unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
- ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
- if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
- continue;
- ValueMappingIt->second.erase(ValToRemove);
- if (ValueMappingIt->second.empty())
- return false;
- }
- }
- return true;
- }
- /// Determine if operand number \p TargetArgVal is in the current mapping set
- /// for operand number \p SourceArgVal.
- ///
- /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
- /// value numbers from source IRSimilarityCandidate to target
- /// IRSimilarityCandidate.
- /// \param [in] SourceArgVal The global value number for an operand in the
- /// in the original candidate.
- /// \param [in] TargetArgVal The global value number for the corresponding
- /// operand in the other candidate.
- /// \returns True if there exists a mapping and false if not.
- bool checkNumberingAndReplace(
- DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
- unsigned SourceArgVal, unsigned TargetArgVal) {
- // We are given two unsigned integers representing the global values of
- // the operands in different IRSimilarityCandidates and a current mapping
- // between the two.
- //
- // Source Operand GVN: 1
- // Target Operand GVN: 2
- // CurrentMapping: {1: {1, 2}}
- //
- // Since we have mapping, and the target operand is contained in the set, we
- // update it to:
- // CurrentMapping: {1: {2}}
- // and can return true. But, if the mapping was
- // CurrentMapping: {1: {3}}
- // we would return false.
- bool WasInserted;
- DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
- std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
- std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
- // If we created a new mapping, then we are done.
- if (WasInserted)
- return true;
- // If there is more than one option in the mapping set, and the target value
- // is included in the mapping set replace that set with one that only includes
- // the target value, as it is the only valid mapping via the non commutative
- // instruction.
- DenseSet<unsigned> &TargetSet = Val->second;
- if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
- TargetSet.clear();
- TargetSet.insert(TargetArgVal);
- return true;
- }
- // Return true if we can find the value in the set.
- return TargetSet.contains(TargetArgVal);
- }
- bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
- OperandMapping A, OperandMapping B) {
- // Iterators to keep track of where we are in the operands for each
- // Instruction.
- ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
- ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
- unsigned OperandLength = A.OperVals.size();
- // For each operand, get the value numbering and ensure it is consistent.
- for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
- unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
- unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
- // Attempt to add a set with only the target value. If there is no mapping
- // we can create it here.
- //
- // For an instruction like a subtraction:
- // IRSimilarityCandidateA: IRSimilarityCandidateB:
- // %resultA = sub %a, %b %resultB = sub %d, %e
- //
- // We map %a -> %d and %b -> %e.
- //
- // And check to see whether their mapping is consistent in
- // checkNumberingAndReplace.
- if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
- return false;
- if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
- return false;
- }
- return true;
- }
- bool IRSimilarityCandidate::compareCommutativeOperandMapping(
- OperandMapping A, OperandMapping B) {
- DenseSet<unsigned> ValueNumbersA;
- DenseSet<unsigned> ValueNumbersB;
- ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
- ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
- unsigned OperandLength = A.OperVals.size();
- // Find the value number sets for the operands.
- for (unsigned Idx = 0; Idx < OperandLength;
- Idx++, VItA++, VItB++) {
- ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
- ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
- }
- // Iterate over the operands in the first IRSimilarityCandidate and make sure
- // there exists a possible mapping with the operands in the second
- // IRSimilarityCandidate.
- if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
- A.ValueNumberMapping, A.OperVals,
- ValueNumbersB))
- return false;
- // Iterate over the operands in the second IRSimilarityCandidate and make sure
- // there exists a possible mapping with the operands in the first
- // IRSimilarityCandidate.
- if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
- B.ValueNumberMapping, B.OperVals,
- ValueNumbersA))
- return false;
- return true;
- }
- bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
- RelativeLocMapping B) {
- // Get the basic blocks the label refers to.
- BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
- BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
- // Get the basic blocks contained in each region.
- DenseSet<BasicBlock *> BasicBlockA;
- DenseSet<BasicBlock *> BasicBlockB;
- A.IRSC.getBasicBlocks(BasicBlockA);
- B.IRSC.getBasicBlocks(BasicBlockB);
-
- // Determine if the block is contained in the region.
- bool AContained = BasicBlockA.contains(ABB);
- bool BContained = BasicBlockB.contains(BBB);
- // Both blocks need to be contained in the region, or both need to be outside
- // the reigon.
- if (AContained != BContained)
- return false;
-
- // If both are contained, then we need to make sure that the relative
- // distance to the target blocks are the same.
- if (AContained)
- return A.RelativeLocation == B.RelativeLocation;
- return true;
- }
- bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
- const IRSimilarityCandidate &B) {
- DenseMap<unsigned, DenseSet<unsigned>> MappingA;
- DenseMap<unsigned, DenseSet<unsigned>> MappingB;
- return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
- }
- typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
- SmallVector<int, 4> &, ArrayRef<Value *> &,
- ArrayRef<Value *> &>
- ZippedRelativeLocationsT;
- bool IRSimilarityCandidate::compareStructure(
- const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
- DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
- DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
- if (A.getLength() != B.getLength())
- return false;
- if (A.ValueToNumber.size() != B.ValueToNumber.size())
- return false;
- iterator ItA = A.begin();
- iterator ItB = B.begin();
- // These ValueNumber Mapping sets create a create a mapping between the values
- // in one candidate to values in the other candidate. If we create a set with
- // one element, and that same element maps to the original element in the
- // candidate we have a good mapping.
- DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
- // Iterate over the instructions contained in each candidate
- unsigned SectionLength = A.getStartIdx() + A.getLength();
- for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
- ItA++, ItB++, Loc++) {
- // Make sure the instructions are similar to one another.
- if (!isClose(*ItA, *ItB))
- return false;
- Instruction *IA = ItA->Inst;
- Instruction *IB = ItB->Inst;
- if (!ItA->Legal || !ItB->Legal)
- return false;
- // Get the operand sets for the instructions.
- ArrayRef<Value *> OperValsA = ItA->OperVals;
- ArrayRef<Value *> OperValsB = ItB->OperVals;
- unsigned InstValA = A.ValueToNumber.find(IA)->second;
- unsigned InstValB = B.ValueToNumber.find(IB)->second;
- bool WasInserted;
- // Ensure that the mappings for the instructions exists.
- std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
- std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
- if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
- return false;
- std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
- std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
- if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
- return false;
- // We have different paths for commutative instructions and non-commutative
- // instructions since commutative instructions could allow multiple mappings
- // to certain values.
- if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
- !isa<IntrinsicInst>(IA)) {
- if (!compareCommutativeOperandMapping(
- {A, OperValsA, ValueNumberMappingA},
- {B, OperValsB, ValueNumberMappingB}))
- return false;
- continue;
- }
- // Handle the non-commutative cases.
- if (!compareNonCommutativeOperandMapping(
- {A, OperValsA, ValueNumberMappingA},
- {B, OperValsB, ValueNumberMappingB}))
- return false;
- // Here we check that between two corresponding instructions,
- // when referring to a basic block in the same region, the
- // relative locations are the same. And, that the instructions refer to
- // basic blocks outside the region in the same corresponding locations.
- // We are able to make the assumption about blocks outside of the region
- // since the target block labels are considered values and will follow the
- // same number matching that we defined for the other instructions in the
- // region. So, at this point, in each location we target a specific block
- // outside the region, we are targeting a corresponding block in each
- // analagous location in the region we are comparing to.
- if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
- !(isa<PHINode>(IA) && isa<PHINode>(IB)))
- continue;
- SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
- SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
- if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
- OperValsA.size() != OperValsB.size())
- return false;
- ZippedRelativeLocationsT ZippedRelativeLocations =
- zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
- if (any_of(ZippedRelativeLocations,
- [&A, &B](std::tuple<int, int, Value *, Value *> R) {
- return !checkRelativeLocations(
- {A, std::get<0>(R), std::get<2>(R)},
- {B, std::get<1>(R), std::get<3>(R)});
- }))
- return false;
- }
- return true;
- }
- bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
- const IRSimilarityCandidate &B) {
- auto DoesOverlap = [](const IRSimilarityCandidate &X,
- const IRSimilarityCandidate &Y) {
- // Check:
- // XXXXXX X starts before Y ends
- // YYYYYYY Y starts after X starts
- return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
- };
- return DoesOverlap(A, B) || DoesOverlap(B, A);
- }
- void IRSimilarityIdentifier::populateMapper(
- Module &M, std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping) {
- std::vector<IRInstructionData *> InstrListForModule;
- std::vector<unsigned> IntegerMappingForModule;
- // Iterate over the functions in the module to map each Instruction in each
- // BasicBlock to an unsigned integer.
- Mapper.initializeForBBs(M);
- for (Function &F : M) {
- if (F.empty())
- continue;
- for (BasicBlock &BB : F) {
- // BB has potential to have similarity since it has a size greater than 2
- // and can therefore match other regions greater than 2. Map it to a list
- // of unsigned integers.
- Mapper.convertToUnsignedVec(BB, InstrListForModule,
- IntegerMappingForModule);
- }
- BasicBlock::iterator It = F.begin()->end();
- Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
- true);
- if (InstrListForModule.size() > 0)
- Mapper.IDL->push_back(*InstrListForModule.back());
- }
- // Insert the InstrListForModule at the end of the overall InstrList so that
- // we can have a long InstrList for the entire set of Modules being analyzed.
- llvm::append_range(InstrList, InstrListForModule);
- // Do the same as above, but for IntegerMapping.
- llvm::append_range(IntegerMapping, IntegerMappingForModule);
- }
- void IRSimilarityIdentifier::populateMapper(
- ArrayRef<std::unique_ptr<Module>> &Modules,
- std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping) {
- // Iterate over, and map the instructions in each module.
- for (const std::unique_ptr<Module> &M : Modules)
- populateMapper(*M, InstrList, IntegerMapping);
- }
- /// From a repeated subsequence, find all the different instances of the
- /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
- /// the IRInstructionData in subsequence.
- ///
- /// \param [in] Mapper - The instruction mapper for basic correctness checks.
- /// \param [in] InstrList - The vector that holds the instruction data.
- /// \param [in] IntegerMapping - The vector that holds the mapped integers.
- /// \param [out] CandsForRepSubstring - The vector to store the generated
- /// IRSimilarityCandidates.
- static void createCandidatesFromSuffixTree(
- const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
- std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
- unsigned StringLen = RS.Length;
- if (StringLen < 2)
- return;
- // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
- for (const unsigned &StartIdx : RS.StartIndices) {
- unsigned EndIdx = StartIdx + StringLen - 1;
- // Check that this subsequence does not contain an illegal instruction.
- bool ContainsIllegal = false;
- for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
- unsigned Key = IntegerMapping[CurrIdx];
- if (Key > Mapper.IllegalInstrNumber) {
- ContainsIllegal = true;
- break;
- }
- }
- // If we have an illegal instruction, we should not create an
- // IRSimilarityCandidate for this region.
- if (ContainsIllegal)
- continue;
- // We are getting iterators to the instructions in this region of code
- // by advancing the start and end indices from the start of the
- // InstrList.
- std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
- std::advance(StartIt, StartIdx);
- std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
- std::advance(EndIt, EndIdx);
- CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
- }
- }
- void IRSimilarityCandidate::createCanonicalRelationFrom(
- IRSimilarityCandidate &SourceCand,
- DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
- DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
- assert(SourceCand.CanonNumToNumber.size() != 0 &&
- "Base canonical relationship is empty!");
- assert(SourceCand.NumberToCanonNum.size() != 0 &&
- "Base canonical relationship is empty!");
- assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
- assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
- DenseSet<unsigned> UsedGVNs;
- // Iterate over the mappings provided from this candidate to SourceCand. We
- // are then able to map the GVN in this candidate to the same canonical number
- // given to the corresponding GVN in SourceCand.
- for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
- unsigned SourceGVN = GVNMapping.first;
- assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
- unsigned ResultGVN;
- // We need special handling if we have more than one potential value. This
- // means that there are at least two GVNs that could correspond to this GVN.
- // This could lead to potential swapping later on, so we make a decision
- // here to ensure a one-to-one mapping.
- if (GVNMapping.second.size() > 1) {
- bool Found = false;
- for (unsigned Val : GVNMapping.second) {
- // We make sure the target value number hasn't already been reserved.
- if (UsedGVNs.contains(Val))
- continue;
- // We make sure that the opposite mapping is still consistent.
- DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
- FromSourceMapping.find(Val);
- if (!It->second.contains(SourceGVN))
- continue;
- // We pick the first item that satisfies these conditions.
- Found = true;
- ResultGVN = Val;
- break;
- }
- assert(Found && "Could not find matching value for source GVN");
- (void)Found;
- } else
- ResultGVN = *GVNMapping.second.begin();
- // Whatever GVN is found, we mark it as used.
- UsedGVNs.insert(ResultGVN);
- unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
- CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
- NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
- }
- DenseSet<BasicBlock *> BBSet;
- getBasicBlocks(BBSet);
- // Find canonical numbers for the BasicBlocks in the current candidate.
- // This is done by finding the corresponding value for the first instruction
- // in the block in the current candidate, finding the matching value in the
- // source candidate. Then by finding the parent of this value, use the
- // canonical number of the block in the source candidate for the canonical
- // number in the current candidate.
- for (BasicBlock *BB : BBSet) {
- unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
- // We can skip the BasicBlock if the canonical numbering has already been
- // found in a separate instruction.
- if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
- continue;
- // If the basic block is the starting block, then the shared instruction may
- // not be the first instruction in the block, it will be the first
- // instruction in the similarity region.
- Value *FirstOutlineInst = BB == getStartBB()
- ? frontInstruction()
- : &*BB->instructionsWithoutDebug().begin();
- unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
- unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
- unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
- Value *SourceV = *SourceCand.fromGVN(SourceGVN);
- BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
- unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
- unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
- CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
- NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
- }
- }
- void IRSimilarityCandidate::createCanonicalMappingFor(
- IRSimilarityCandidate &CurrCand) {
- assert(CurrCand.CanonNumToNumber.size() == 0 &&
- "Canonical Relationship is non-empty");
- assert(CurrCand.NumberToCanonNum.size() == 0 &&
- "Canonical Relationship is non-empty");
- unsigned CanonNum = 0;
- // Iterate over the value numbers found, the order does not matter in this
- // case.
- for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
- CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
- CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
- CanonNum++;
- }
- }
- /// From the list of IRSimilarityCandidates, perform a comparison between each
- /// IRSimilarityCandidate to determine if there are overlapping
- /// IRInstructionData, or if they do not have the same structure.
- ///
- /// \param [in] CandsForRepSubstring - The vector containing the
- /// IRSimilarityCandidates.
- /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
- /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
- /// vector are structurally similar to one another.
- static void findCandidateStructures(
- std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
- DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
- std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
- InnerCandEndIt;
- // IRSimilarityCandidates each have a structure for operand use. It is
- // possible that two instances of the same subsequences have different
- // structure. Each type of structure found is assigned a number. This
- // DenseMap maps an IRSimilarityCandidate to which type of similarity
- // discovered it fits within.
- DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
- // Find the compatibility from each candidate to the others to determine
- // which candidates overlap and which have the same structure by mapping
- // each structure to a different group.
- bool SameStructure;
- bool Inserted;
- unsigned CurrentGroupNum = 0;
- unsigned OuterGroupNum;
- DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
- DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
- DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
- // Iterate over the candidates to determine its structural and overlapping
- // compatibility with other instructions
- DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
- DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
- for (CandIt = CandsForRepSubstring.begin(),
- CandEndIt = CandsForRepSubstring.end();
- CandIt != CandEndIt; CandIt++) {
- // Determine if it has an assigned structural group already.
- CandToGroupIt = CandToGroup.find(&*CandIt);
- if (CandToGroupIt == CandToGroup.end()) {
- // If not, we assign it one, and add it to our mapping.
- std::tie(CandToGroupIt, Inserted) =
- CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
- }
- // Get the structural group number from the iterator.
- OuterGroupNum = CandToGroupIt->second;
- // Check if we already have a list of IRSimilarityCandidates for the current
- // structural group. Create one if one does not exist.
- CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
- if (CurrentGroupPair == StructuralGroups.end()) {
- IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
- std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
- std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
- }
- // Iterate over the IRSimilarityCandidates following the current
- // IRSimilarityCandidate in the list to determine whether the two
- // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
- // of IRSimilarityCandidates.
- for (InnerCandIt = std::next(CandIt),
- InnerCandEndIt = CandsForRepSubstring.end();
- InnerCandIt != InnerCandEndIt; InnerCandIt++) {
- // We check if the inner item has a group already, if it does, we skip it.
- CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
- if (CandToGroupItInner != CandToGroup.end())
- continue;
- // Otherwise we determine if they have the same structure and add it to
- // vector if they match.
- ValueNumberMappingA.clear();
- ValueNumberMappingB.clear();
- SameStructure = IRSimilarityCandidate::compareStructure(
- *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
- if (!SameStructure)
- continue;
- InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
- ValueNumberMappingB);
- CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
- CurrentGroupPair->second.push_back(*InnerCandIt);
- }
- }
- }
- void IRSimilarityIdentifier::findCandidates(
- std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping) {
- SuffixTree ST(IntegerMapping);
- std::vector<IRSimilarityCandidate> CandsForRepSubstring;
- std::vector<SimilarityGroup> NewCandidateGroups;
- DenseMap<unsigned, SimilarityGroup> StructuralGroups;
- // Iterate over the subsequences found by the Suffix Tree to create
- // IRSimilarityCandidates for each repeated subsequence and determine which
- // instances are structurally similar to one another.
- for (SuffixTree::RepeatedSubstring &RS : ST) {
- createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
- CandsForRepSubstring);
- if (CandsForRepSubstring.size() < 2)
- continue;
- findCandidateStructures(CandsForRepSubstring, StructuralGroups);
- for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
- // We only add the group if it contains more than one
- // IRSimilarityCandidate. If there is only one, that means there is no
- // other repeated subsequence with the same structure.
- if (Group.second.size() > 1)
- SimilarityCandidates->push_back(Group.second);
- CandsForRepSubstring.clear();
- StructuralGroups.clear();
- NewCandidateGroups.clear();
- }
- }
- SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
- ArrayRef<std::unique_ptr<Module>> Modules) {
- resetSimilarityCandidates();
- std::vector<IRInstructionData *> InstrList;
- std::vector<unsigned> IntegerMapping;
- Mapper.InstClassifier.EnableBranches = this->EnableBranches;
- Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
- Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
- Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
- Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
- populateMapper(Modules, InstrList, IntegerMapping);
- findCandidates(InstrList, IntegerMapping);
- return *SimilarityCandidates;
- }
- SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
- resetSimilarityCandidates();
- Mapper.InstClassifier.EnableBranches = this->EnableBranches;
- Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
- Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
- Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
- Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
- std::vector<IRInstructionData *> InstrList;
- std::vector<unsigned> IntegerMapping;
- populateMapper(M, InstrList, IntegerMapping);
- findCandidates(InstrList, IntegerMapping);
- return *SimilarityCandidates;
- }
- INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
- "ir-similarity-identifier", false, true)
- IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
- : ModulePass(ID) {
- initializeIRSimilarityIdentifierWrapperPassPass(
- *PassRegistry::getPassRegistry());
- }
- bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
- IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
- MatchCallsByName, !DisableIntrinsics,
- false));
- return false;
- }
- bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
- IRSI.reset();
- return false;
- }
- bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
- IRSI->findSimilarity(M);
- return false;
- }
- AnalysisKey IRSimilarityAnalysis::Key;
- IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
- ModuleAnalysisManager &) {
- auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
- MatchCallsByName, !DisableIntrinsics,
- false);
- IRSI.findSimilarity(M);
- return IRSI;
- }
- PreservedAnalyses
- IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
- IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
- std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
- IRSI.getSimilarity();
- for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
- OS << CandVec.size() << " candidates of length "
- << CandVec.begin()->getLength() << ". Found in: \n";
- for (IRSimilarityCandidate &Cand : CandVec) {
- OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
- << ", Basic Block: ";
- if (Cand.front()->Inst->getParent()->getName().str() == "")
- OS << "(unnamed)";
- else
- OS << Cand.front()->Inst->getParent()->getName().str();
- OS << "\n Start Instruction: ";
- Cand.frontInstruction()->print(OS);
- OS << "\n End Instruction: ";
- Cand.backInstruction()->print(OS);
- OS << "\n";
- }
- }
- return PreservedAnalyses::all();
- }
- char IRSimilarityIdentifierWrapperPass::ID = 0;
|