123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800 |
- #pragma once
- #ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-parameter"
- #endif
- //===- IRSimilarityIdentifier.h - Find similarity in a module --------------==//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // \file
- // Interface file for the IRSimilarityIdentifier for identifying similarities in
- // IR including the IRInstructionMapper, which maps an Instruction to unsigned
- // integers.
- //
- // Two sequences of instructions are called "similar" if they perform the same
- // series of operations for all inputs.
- //
- // \code
- // %1 = add i32 %a, 10
- // %2 = add i32 %a, %1
- // %3 = icmp slt icmp %1, %2
- // \endcode
- //
- // and
- //
- // \code
- // %1 = add i32 11, %a
- // %2 = sub i32 %a, %1
- // %3 = icmp sgt icmp %2, %1
- // \endcode
- //
- // ultimately have the same result, even if the inputs, and structure are
- // slightly different.
- //
- // For instructions, we do not worry about operands that do not have fixed
- // semantic meaning to the program. We consider the opcode that the instruction
- // has, the types, parameters, and extra information such as the function name,
- // or comparison predicate. These are used to create a hash to map instructions
- // to integers to be used in similarity matching in sequences of instructions
- //
- // Terminology:
- // An IRSimilarityCandidate is a region of IRInstructionData (wrapped
- // Instructions), usually used to denote a region of similarity has been found.
- //
- // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally
- // similar to one another.
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
- #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
- #include "llvm/IR/InstVisitor.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/IR/Module.h"
- #include "llvm/IR/PassManager.h"
- #include "llvm/Pass.h"
- #include "llvm/Support/Allocator.h"
- namespace llvm {
- namespace IRSimilarity {
- struct IRInstructionDataList;
- /// This represents what is and is not supported when finding similarity in
- /// Instructions.
- ///
- /// Legal Instructions are considered when looking at similarity between
- /// Instructions.
- ///
- /// Illegal Instructions cannot be considered when looking for similarity
- /// between Instructions. They act as boundaries between similarity regions.
- ///
- /// Invisible Instructions are skipped over during analysis.
- // TODO: Shared with MachineOutliner
- enum InstrType { Legal, Illegal, Invisible };
- /// This provides the utilities for hashing an Instruction to an unsigned
- /// integer. Two IRInstructionDatas produce the same hash value when their
- /// underlying Instructions perform the same operation (even if they don't have
- /// the same input operands.)
- /// As a more concrete example, consider the following:
- ///
- /// \code
- /// %add1 = add i32 %a, %b
- /// %add2 = add i32 %c, %d
- /// %add3 = add i64 %e, %f
- /// \endcode
- ///
- // Then the IRInstructionData wrappers for these Instructions may be hashed like
- /// so:
- ///
- /// \code
- /// ; These two adds have the same types and operand types, so they hash to the
- /// ; same number.
- /// %add1 = add i32 %a, %b ; Hash: 1
- /// %add2 = add i32 %c, %d ; Hash: 1
- /// ; This add produces an i64. This differentiates it from %add1 and %add2. So,
- /// ; it hashes to a different number.
- /// %add3 = add i64 %e, %f; Hash: 2
- /// \endcode
- ///
- ///
- /// This hashing scheme will be used to represent the program as a very long
- /// string. This string can then be placed in a data structure which can be used
- /// for similarity queries.
- ///
- /// TODO: Handle types of Instructions which can be equal even with different
- /// operands. (E.g. comparisons with swapped predicates.)
- /// TODO: Handle CallInsts, which are only checked for function type
- /// by \ref isSameOperationAs.
- /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
- /// exact same, and some do not.
- struct IRInstructionData : ilist_node<IRInstructionData> {
- /// The source Instruction that is being wrapped.
- Instruction *Inst = nullptr;
- /// The values of the operands in the Instruction.
- SmallVector<Value *, 4> OperVals;
- /// The legality of the wrapped instruction. This is informed by InstrType,
- /// and is used when checking when two instructions are considered similar.
- /// If either instruction is not legal, the instructions are automatically not
- /// considered similar.
- bool Legal;
- /// This is only relevant if we are wrapping a CmpInst where we needed to
- /// change the predicate of a compare instruction from a greater than form
- /// to a less than form. It is None otherwise.
- Optional<CmpInst::Predicate> RevisedPredicate;
- /// Gather the information that is difficult to gather for an Instruction, or
- /// is changed. i.e. the operands of an Instruction and the Types of those
- /// operands. This extra information allows for similarity matching to make
- /// assertions that allow for more flexibility when checking for whether an
- /// Instruction performs the same operation.
- IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
- /// Get the predicate that the compare instruction is using for hashing the
- /// instruction. the IRInstructionData must be wrapping a CmpInst.
- CmpInst::Predicate getPredicate() const;
- /// A function that swaps the predicates to their less than form if they are
- /// in a greater than form. Otherwise, the predicate is unchanged.
- ///
- /// \param CI - The comparison operation to find a consistent preidcate for.
- /// \return the consistent comparison predicate.
- static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
- /// Hashes \p Value based on its opcode, types, and operand types.
- /// Two IRInstructionData instances produce the same hash when they perform
- /// the same operation.
- ///
- /// As a simple example, consider the following instructions.
- ///
- /// \code
- /// %add1 = add i32 %x1, %y1
- /// %add2 = add i32 %x2, %y2
- ///
- /// %sub = sub i32 %x1, %y1
- ///
- /// %add_i64 = add i64 %x2, %y2
- /// \endcode
- ///
- /// Because the first two adds operate the same types, and are performing the
- /// same action, they will be hashed to the same value.
- ///
- /// However, the subtraction instruction is not the same as an addition, and
- /// will be hashed to a different value.
- ///
- /// Finally, the last add has a different type compared to the first two add
- /// instructions, so it will also be hashed to a different value that any of
- /// the previous instructions.
- ///
- /// \param [in] ID - The IRInstructionData instance to be hashed.
- /// \returns A hash_value of the IRInstructionData.
- friend hash_code hash_value(const IRInstructionData &ID) {
- SmallVector<Type *, 4> OperTypes;
- for (Value *V : ID.OperVals)
- OperTypes.push_back(V->getType());
- if (isa<CmpInst>(ID.Inst))
- return llvm::hash_combine(
- llvm::hash_value(ID.Inst->getOpcode()),
- llvm::hash_value(ID.Inst->getType()),
- llvm::hash_value(ID.getPredicate()),
- llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
- else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst))
- return llvm::hash_combine(
- llvm::hash_value(ID.Inst->getOpcode()),
- llvm::hash_value(ID.Inst->getType()),
- llvm::hash_value(CI->getCalledFunction()->getName().str()),
- llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
- return llvm::hash_combine(
- llvm::hash_value(ID.Inst->getOpcode()),
- llvm::hash_value(ID.Inst->getType()),
- llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
- }
- IRInstructionDataList *IDL = nullptr;
- };
- struct IRInstructionDataList : simple_ilist<IRInstructionData> {};
- /// Compare one IRInstructionData class to another IRInstructionData class for
- /// whether they are performing a the same operation, and can mapped to the
- /// same value. For regular instructions if the hash value is the same, then
- /// they will also be close.
- ///
- /// \param A - The first IRInstructionData class to compare
- /// \param B - The second IRInstructionData class to compare
- /// \returns true if \p A and \p B are similar enough to be mapped to the same
- /// value.
- bool isClose(const IRInstructionData &A, const IRInstructionData &B);
- struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> {
- static inline IRInstructionData *getEmptyKey() { return nullptr; }
- static inline IRInstructionData *getTombstoneKey() {
- return reinterpret_cast<IRInstructionData *>(-1);
- }
- static unsigned getHashValue(const IRInstructionData *E) {
- using llvm::hash_value;
- assert(E && "IRInstructionData is a nullptr?");
- return hash_value(*E);
- }
- static bool isEqual(const IRInstructionData *LHS,
- const IRInstructionData *RHS) {
- if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
- LHS == getEmptyKey() || LHS == getTombstoneKey())
- return LHS == RHS;
- assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?");
- return isClose(*LHS, *RHS);
- }
- };
- /// Helper struct for converting the Instructions in a Module into a vector of
- /// unsigned integers. This vector of unsigned integers can be thought of as a
- /// "numeric string". This numeric string can then be queried by, for example,
- /// data structures that find repeated substrings.
- ///
- /// This hashing is done per BasicBlock in the module. To hash Instructions
- /// based off of their operations, each Instruction is wrapped in an
- /// IRInstructionData struct. The unsigned integer for an IRInstructionData
- /// depends on:
- /// - The hash provided by the IRInstructionData.
- /// - Which member of InstrType the IRInstructionData is classified as.
- // See InstrType for more details on the possible classifications, and how they
- // manifest in the numeric string.
- ///
- /// The numeric string for an individual BasicBlock is terminated by an unique
- /// unsigned integer. This prevents data structures which rely on repetition
- /// from matching across BasicBlocks. (For example, the SuffixTree.)
- /// As a concrete example, if we have the following two BasicBlocks:
- /// \code
- /// bb0:
- /// %add1 = add i32 %a, %b
- /// %add2 = add i32 %c, %d
- /// %add3 = add i64 %e, %f
- /// bb1:
- /// %sub = sub i32 %c, %d
- /// \endcode
- /// We may hash the Instructions like this (via IRInstructionData):
- /// \code
- /// bb0:
- /// %add1 = add i32 %a, %b ; Hash: 1
- /// %add2 = add i32 %c, %d; Hash: 1
- /// %add3 = add i64 %e, %f; Hash: 2
- /// bb1:
- /// %sub = sub i32 %c, %d; Hash: 3
- /// %add4 = add i32 %c, %d ; Hash: 1
- /// \endcode
- /// And produce a "numeric string representation" like so:
- /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2
- ///
- /// TODO: This is very similar to the MachineOutliner, and should be
- /// consolidated into the same interface.
- struct IRInstructionMapper {
- /// The starting illegal instruction number to map to.
- ///
- /// Set to -3 for compatibility with DenseMapInfo<unsigned>.
- unsigned IllegalInstrNumber = static_cast<unsigned>(-3);
- /// The next available integer to assign to a legal Instruction to.
- unsigned LegalInstrNumber = 0;
- /// Correspondence from IRInstructionData to unsigned integers.
- DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
- InstructionIntegerMap;
- /// Set if we added an illegal number in the previous step.
- /// Since each illegal number is unique, we only need one of them between
- /// each range of legal numbers. This lets us make sure we don't add more
- /// than one illegal number per range.
- bool AddedIllegalLastTime = false;
- /// Marks whether we found a illegal instruction in the previous step.
- bool CanCombineWithPrevInstr = false;
- /// Marks whether we have found a set of instructions that is long enough
- /// to be considered for similarity.
- bool HaveLegalRange = false;
- /// This allocator pointer is in charge of holding on to the IRInstructionData
- /// so it is not deallocated until whatever external tool is using it is done
- /// with the information.
- SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr;
- /// This allocator pointer is in charge of creating the IRInstructionDataList
- /// so it is not deallocated until whatever external tool is using it is done
- /// with the information.
- SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr;
- /// Get an allocated IRInstructionData struct using the InstDataAllocator.
- ///
- /// \param I - The Instruction to wrap with IRInstructionData.
- /// \param Legality - A boolean value that is true if the instruction is to
- /// be considered for similarity, and false if not.
- /// \param IDL - The InstructionDataList that the IRInstructionData is
- /// inserted into.
- /// \returns An allocated IRInstructionData struct.
- IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
- IRInstructionDataList &IDL);
- /// Get an allocated IRInstructionDataList object using the IDLAllocator.
- ///
- /// \returns An allocated IRInstructionDataList object.
- IRInstructionDataList *allocateIRInstructionDataList();
- IRInstructionDataList *IDL = nullptr;
- /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
- /// determined by \p InstrType. Two Instructions are mapped to the same value
- /// if they are close as defined by the InstructionData class above.
- ///
- /// \param [in] BB - The BasicBlock to be mapped to integers.
- /// \param [in,out] InstrList - Vector of IRInstructionData to append to.
- /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to.
- void convertToUnsignedVec(BasicBlock &BB,
- std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping);
- /// Maps an Instruction to a legal integer.
- ///
- /// \param [in] It - The Instruction to be mapped to an integer.
- /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
- /// append to.
- /// \param [in,out] InstrListForBB - Vector of InstructionData to append to.
- /// \returns The integer \p It was mapped to.
- unsigned mapToLegalUnsigned(BasicBlock::iterator &It,
- std::vector<unsigned> &IntegerMappingForBB,
- std::vector<IRInstructionData *> &InstrListForBB);
- /// Maps an Instruction to an illegal integer.
- ///
- /// \param [in] It - The \p Instruction to be mapped to an integer.
- /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
- /// append to.
- /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to.
- /// \param End - true if creating a dummy IRInstructionData at the end of a
- /// basic block.
- /// \returns The integer \p It was mapped to.
- unsigned mapToIllegalUnsigned(
- BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
- std::vector<IRInstructionData *> &InstrListForBB, bool End = false);
- IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA,
- SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA)
- : InstDataAllocator(IDA), IDLAllocator(IDLA) {
- // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
- // changed.
- assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) &&
- "DenseMapInfo<unsigned>'s empty key isn't -1!");
- assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
- static_cast<unsigned>(-2) &&
- "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
- IDL = new (IDLAllocator->Allocate())
- IRInstructionDataList();
- }
- /// Custom InstVisitor to classify different instructions for whether it can
- /// be analyzed for similarity.
- struct InstructionClassification
- : public InstVisitor<InstructionClassification, InstrType> {
- InstructionClassification() {}
- // TODO: Determine a scheme to resolve when the label is similar enough.
- InstrType visitBranchInst(BranchInst &BI) { return Illegal; }
- // TODO: Determine a scheme to resolve when the labels are similar enough.
- InstrType visitPHINode(PHINode &PN) { return Illegal; }
- // TODO: Handle allocas.
- InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
- // We exclude variable argument instructions since variable arguments
- // requires extra checking of the argument list.
- InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; }
- // We exclude all exception handling cases since they are so context
- // dependent.
- InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; }
- InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; }
- // DebugInfo should be included in the regions, but should not be
- // analyzed for similarity as it has no bearing on the outcome of the
- // program.
- InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
- // TODO: Handle specific intrinsics.
- InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
- // We only allow call instructions where the function has a name and
- // is not an indirect call.
- InstrType visitCallInst(CallInst &CI) {
- Function *F = CI.getCalledFunction();
- if (!F || CI.isIndirectCall() || !F->hasName())
- return Illegal;
- return Legal;
- }
- // TODO: We do not current handle similarity that changes the control flow.
- InstrType visitInvokeInst(InvokeInst &II) { return Illegal; }
- // TODO: We do not current handle similarity that changes the control flow.
- InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; }
- // TODO: Handle interblock similarity.
- InstrType visitTerminator(Instruction &I) { return Illegal; }
- InstrType visitInstruction(Instruction &I) { return Legal; }
- };
- /// Maps an Instruction to a member of InstrType.
- InstructionClassification InstClassifier;
- };
- /// This is a class that wraps a range of IRInstructionData from one point to
- /// another in the vector of IRInstructionData, which is a region of the
- /// program. It is also responsible for defining the structure within this
- /// region of instructions.
- ///
- /// The structure of a region is defined through a value numbering system
- /// assigned to each unique value in a region at the creation of the
- /// IRSimilarityCandidate.
- ///
- /// For example, for each Instruction we add a mapping for each new
- /// value seen in that Instruction.
- /// IR: Mapping Added:
- /// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
- /// %add2 = add i32 %a, %1 %add2 -> 4
- /// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
- ///
- /// We can compare IRSimilarityCandidates against one another.
- /// The \ref isSimilar function compares each IRInstructionData against one
- /// another and if we have the same sequences of IRInstructionData that would
- /// create the same hash, we have similar IRSimilarityCandidates.
- ///
- /// We can also compare the structure of IRSimilarityCandidates. If we can
- /// create a mapping of registers in the region contained by one
- /// IRSimilarityCandidate to the region contained by different
- /// IRSimilarityCandidate, they can be considered structurally similar.
- ///
- /// IRSimilarityCandidate1: IRSimilarityCandidate2:
- /// %add1 = add i32 %a, %b %add1 = add i32 %d, %e
- /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
- /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
- ///
- /// Can have the following mapping from candidate to candidate of:
- /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
- /// and can be considered similar.
- ///
- /// IRSimilarityCandidate1: IRSimilarityCandidate2:
- /// %add1 = add i32 %a, %b %add1 = add i32 %d, c4
- /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
- /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
- ///
- /// We cannot create the same mapping since the use of c4 is not used in the
- /// same way as %b or c2.
- class IRSimilarityCandidate {
- private:
- /// The start index of this IRSimilarityCandidate in the instruction list.
- unsigned StartIdx = 0;
- /// The number of instructions in this IRSimilarityCandidate.
- unsigned Len = 0;
- /// The first instruction in this IRSimilarityCandidate.
- IRInstructionData *FirstInst = nullptr;
- /// The last instruction in this IRSimilarityCandidate.
- IRInstructionData *LastInst = nullptr;
- /// Global Value Numbering structures
- /// @{
- /// Stores the mapping of the value to the number assigned to it in the
- /// IRSimilarityCandidate.
- DenseMap<Value *, unsigned> ValueToNumber;
- /// Stores the mapping of the number to the value assigned this number.
- DenseMap<unsigned, Value *> NumberToValue;
- /// @}
- public:
- /// \param StartIdx - The starting location of the region.
- /// \param Len - The length of the region.
- /// \param FirstInstIt - The starting IRInstructionData of the region.
- /// \param LastInstIt - The ending IRInstructionData of the region.
- IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
- IRInstructionData *FirstInstIt,
- IRInstructionData *LastInstIt);
- /// \param A - The first IRInstructionCandidate to compare.
- /// \param B - The second IRInstructionCandidate to compare.
- /// \returns True when every IRInstructionData in \p A is similar to every
- /// IRInstructionData in \p B.
- static bool isSimilar(const IRSimilarityCandidate &A,
- const IRSimilarityCandidate &B);
- /// \param A - The first IRInstructionCandidate to compare.
- /// \param B - The second IRInstructionCandidate to compare.
- /// \returns True when every IRInstructionData in \p A is structurally similar
- /// to \p B.
- static bool compareStructure(const IRSimilarityCandidate &A,
- const IRSimilarityCandidate &B);
- struct OperandMapping {
- /// The IRSimilarityCandidate that holds the instruction the OperVals were
- /// pulled from.
- const IRSimilarityCandidate &IRSC;
- /// The operand values to be analyzed.
- ArrayRef<Value *> &OperVals;
- /// The current mapping of global value numbers from one IRSimilarityCandidate
- /// to another IRSimilarityCandidate.
- DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
- };
- /// Compare the operands in \p A and \p B and check that the current mapping
- /// of global value numbers from \p A to \p B and \p B to \A is consistent.
- ///
- /// \param A - The first IRInstructionCandidate, operand values, and current
- /// operand mappings to compare.
- /// \param B - The second IRInstructionCandidate, operand values, and current
- /// operand mappings to compare.
- /// \returns true if the IRSimilarityCandidates operands are compatible.
- static bool compareNonCommutativeOperandMapping(OperandMapping A,
- OperandMapping B);
- /// Compare the operands in \p A and \p B and check that the current mapping
- /// of global value numbers from \p A to \p B and \p B to \A is consistent
- /// given that the operands are commutative.
- ///
- /// \param A - The first IRInstructionCandidate, operand values, and current
- /// operand mappings to compare.
- /// \param B - The second IRInstructionCandidate, operand values, and current
- /// operand mappings to compare.
- /// \returns true if the IRSimilarityCandidates operands are compatible.
- static bool compareCommutativeOperandMapping(OperandMapping A,
- OperandMapping B);
- /// Compare the start and end indices of the two IRSimilarityCandidates for
- /// whether they overlap. If the start instruction of one
- /// IRSimilarityCandidate is less than the end instruction of the other, and
- /// the start instruction of one is greater than the start instruction of the
- /// other, they overlap.
- ///
- /// \returns true if the IRSimilarityCandidates do not have overlapping
- /// instructions.
- static bool overlap(const IRSimilarityCandidate &A,
- const IRSimilarityCandidate &B);
- /// \returns the number of instructions in this Candidate.
- unsigned getLength() const { return Len; }
- /// \returns the start index of this IRSimilarityCandidate.
- unsigned getStartIdx() const { return StartIdx; }
- /// \returns the end index of this IRSimilarityCandidate.
- unsigned getEndIdx() const { return StartIdx + Len - 1; }
- /// \returns The first IRInstructionData.
- IRInstructionData *front() const { return FirstInst; }
- /// \returns The last IRInstructionData.
- IRInstructionData *back() const { return LastInst; }
- /// \returns The first Instruction.
- Instruction *frontInstruction() { return FirstInst->Inst; }
- /// \returns The last Instruction
- Instruction *backInstruction() { return LastInst->Inst; }
- /// \returns The BasicBlock the IRSimilarityCandidate starts in.
- BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
- /// \returns The BasicBlock the IRSimilarityCandidate ends in.
- BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
- /// \returns The Function that the IRSimilarityCandidate is located in.
- Function *getFunction() { return getStartBB()->getParent(); }
- /// Finds the positive number associated with \p V if it has been mapped.
- /// \param [in] V - the Value to find.
- /// \returns The positive number corresponding to the value.
- /// \returns None if not present.
- Optional<unsigned> getGVN(Value *V) {
- assert(V != nullptr && "Value is a nullptr?");
- DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
- if (VNIt == ValueToNumber.end())
- return None;
- return VNIt->second;
- }
- /// Finds the Value associate with \p Num if it exists.
- /// \param [in] Num - the number to find.
- /// \returns The Value associated with the number.
- /// \returns None if not present.
- Optional<Value *> fromGVN(unsigned Num) {
- DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
- if (VNIt == NumberToValue.end())
- return None;
- assert(VNIt->second != nullptr && "Found value is a nullptr!");
- return VNIt->second;
- }
- /// \param RHS -The IRSimilarityCandidate to compare against
- /// \returns true if the IRSimilarityCandidate is occurs after the
- /// IRSimilarityCandidate in the program.
- bool operator<(const IRSimilarityCandidate &RHS) const {
- return getStartIdx() > RHS.getStartIdx();
- }
- using iterator = IRInstructionDataList::iterator;
- iterator begin() const { return iterator(front()); }
- iterator end() const { return std::next(iterator(back())); }
- };
- typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
- typedef std::vector<SimilarityGroup> SimilarityGroupList;
- /// This class puts all the pieces of the IRInstructionData,
- /// IRInstructionMapper, IRSimilarityCandidate together.
- ///
- /// It first feeds the Module or vector of Modules into the IRInstructionMapper,
- /// and puts all the mapped instructions into a single long list of
- /// IRInstructionData.
- ///
- /// The list of unsigned integers is given to the Suffix Tree or similar data
- /// structure to find repeated subsequences. We construct an
- /// IRSimilarityCandidate for each instance of the subsequence. We compare them
- /// against one another since These repeated subsequences can have different
- /// structure. For each different kind of structure found, we create a
- /// similarity group.
- ///
- /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are
- /// structurally similar to one another, while C is different we would have two
- /// SimilarityGroups:
- ///
- /// SimilarityGroup 1: SimilarityGroup 2
- /// A, B, D C
- ///
- /// A list of the different similarity groups is then returned after
- /// analyzing the module.
- class IRSimilarityIdentifier {
- public:
- IRSimilarityIdentifier()
- : Mapper(&InstDataAllocator, &InstDataListAllocator) {}
- /// \param M the module to find similarity in.
- explicit IRSimilarityIdentifier(Module &M)
- : Mapper(&InstDataAllocator, &InstDataListAllocator) {
- findSimilarity(M);
- }
- private:
- /// Map the instructions in the module to unsigned integers, using mapping
- /// already present in the Mapper if possible.
- ///
- /// \param [in] M Module - To map to integers.
- /// \param [in,out] InstrList - The vector to append IRInstructionData to.
- /// \param [in,out] IntegerMapping - The vector to append integers to.
- void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping);
- /// Map the instructions in the modules vector to unsigned integers, using
- /// mapping already present in the mapper if possible.
- ///
- /// \param [in] Modules - The list of modules to use to populate the mapper
- /// \param [in,out] InstrList - The vector to append IRInstructionData to.
- /// \param [in,out] IntegerMapping - The vector to append integers to.
- void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules,
- std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping);
- /// Find the similarity candidates in \p InstrList and corresponding
- /// \p UnsignedVec
- ///
- /// \param [in,out] InstrList - The vector to append IRInstructionData to.
- /// \param [in,out] IntegerMapping - The vector to append integers to.
- /// candidates found in the program.
- void findCandidates(std::vector<IRInstructionData *> &InstrList,
- std::vector<unsigned> &IntegerMapping);
- public:
- // Find the IRSimilarityCandidates in the \p Modules and group by structural
- // similarity in a SimilarityGroup, each group is returned in a
- // SimilarityGroupList.
- //
- // \param [in] Modules - the modules to analyze.
- // \returns The groups of similarity ranges found in the modules.
- SimilarityGroupList &
- findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules);
- // Find the IRSimilarityCandidates in the given Module grouped by structural
- // similarity in a SimilarityGroup, contained inside a SimilarityGroupList.
- //
- // \param [in] M - the module to analyze.
- // \returns The groups of similarity ranges found in the module.
- SimilarityGroupList &findSimilarity(Module &M);
- // Clears \ref SimilarityCandidates if it is already filled by a previous run.
- void resetSimilarityCandidates() {
- // If we've already analyzed a Module or set of Modules, so we must clear
- // the SimilarityCandidates to make sure we do not have only old values
- // hanging around.
- if (SimilarityCandidates.hasValue())
- SimilarityCandidates->clear();
- else
- SimilarityCandidates = SimilarityGroupList();
- }
- // \returns The groups of similarity ranges found in the most recently passed
- // set of modules.
- Optional<SimilarityGroupList> &getSimilarity() {
- return SimilarityCandidates;
- }
- private:
- /// The allocator for IRInstructionData.
- SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
- /// The allocator for IRInstructionDataLists.
- SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator;
- /// Map Instructions to unsigned integers and wraps the Instruction in an
- /// instance of IRInstructionData.
- IRInstructionMapper Mapper;
- /// The SimilarityGroups found with the most recent run of \ref
- /// findSimilarity. None if there is no recent run.
- Optional<SimilarityGroupList> SimilarityCandidates;
- };
- } // end namespace IRSimilarity
- /// An analysis pass based on legacy pass manager that runs and returns
- /// IRSimilarityIdentifier run on the Module.
- class IRSimilarityIdentifierWrapperPass : public ModulePass {
- std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI;
- public:
- static char ID;
- IRSimilarityIdentifierWrapperPass();
- IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; }
- const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; }
- bool doInitialization(Module &M) override;
- bool doFinalization(Module &M) override;
- bool runOnModule(Module &M) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
- /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the
- /// Module.
- class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> {
- public:
- typedef IRSimilarity::IRSimilarityIdentifier Result;
- Result run(Module &M, ModuleAnalysisManager &);
- private:
- friend AnalysisInfoMixin<IRSimilarityAnalysis>;
- static AnalysisKey Key;
- };
- /// Printer pass that uses \c IRSimilarityAnalysis.
- class IRSimilarityAnalysisPrinterPass
- : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> {
- raw_ostream &OS;
- public:
- explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
- };
- } // end namespace llvm
- #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
- #ifdef __GNUC__
- #pragma GCC diagnostic pop
- #endif
|