IRSimilarityIdentifier.h 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- IRSimilarityIdentifier.h - Find similarity in a module --------------==//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // \file
  15. // Interface file for the IRSimilarityIdentifier for identifying similarities in
  16. // IR including the IRInstructionMapper, which maps an Instruction to unsigned
  17. // integers.
  18. //
  19. // Two sequences of instructions are called "similar" if they perform the same
  20. // series of operations for all inputs.
  21. //
  22. // \code
  23. // %1 = add i32 %a, 10
  24. // %2 = add i32 %a, %1
  25. // %3 = icmp slt icmp %1, %2
  26. // \endcode
  27. //
  28. // and
  29. //
  30. // \code
  31. // %1 = add i32 11, %a
  32. // %2 = sub i32 %a, %1
  33. // %3 = icmp sgt icmp %2, %1
  34. // \endcode
  35. //
  36. // ultimately have the same result, even if the inputs, and structure are
  37. // slightly different.
  38. //
  39. // For instructions, we do not worry about operands that do not have fixed
  40. // semantic meaning to the program. We consider the opcode that the instruction
  41. // has, the types, parameters, and extra information such as the function name,
  42. // or comparison predicate. These are used to create a hash to map instructions
  43. // to integers to be used in similarity matching in sequences of instructions
  44. //
  45. // Terminology:
  46. // An IRSimilarityCandidate is a region of IRInstructionData (wrapped
  47. // Instructions), usually used to denote a region of similarity has been found.
  48. //
  49. // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally
  50. // similar to one another.
  51. //
  52. //===----------------------------------------------------------------------===//
  53. #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
  54. #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
  55. #include "llvm/IR/InstVisitor.h"
  56. #include "llvm/IR/Instructions.h"
  57. #include "llvm/IR/Module.h"
  58. #include "llvm/IR/PassManager.h"
  59. #include "llvm/Pass.h"
  60. #include "llvm/Support/Allocator.h"
  61. namespace llvm {
  62. namespace IRSimilarity {
  63. struct IRInstructionDataList;
  64. /// This represents what is and is not supported when finding similarity in
  65. /// Instructions.
  66. ///
  67. /// Legal Instructions are considered when looking at similarity between
  68. /// Instructions.
  69. ///
  70. /// Illegal Instructions cannot be considered when looking for similarity
  71. /// between Instructions. They act as boundaries between similarity regions.
  72. ///
  73. /// Invisible Instructions are skipped over during analysis.
  74. // TODO: Shared with MachineOutliner
  75. enum InstrType { Legal, Illegal, Invisible };
  76. /// This provides the utilities for hashing an Instruction to an unsigned
  77. /// integer. Two IRInstructionDatas produce the same hash value when their
  78. /// underlying Instructions perform the same operation (even if they don't have
  79. /// the same input operands.)
  80. /// As a more concrete example, consider the following:
  81. ///
  82. /// \code
  83. /// %add1 = add i32 %a, %b
  84. /// %add2 = add i32 %c, %d
  85. /// %add3 = add i64 %e, %f
  86. /// \endcode
  87. ///
  88. // Then the IRInstructionData wrappers for these Instructions may be hashed like
  89. /// so:
  90. ///
  91. /// \code
  92. /// ; These two adds have the same types and operand types, so they hash to the
  93. /// ; same number.
  94. /// %add1 = add i32 %a, %b ; Hash: 1
  95. /// %add2 = add i32 %c, %d ; Hash: 1
  96. /// ; This add produces an i64. This differentiates it from %add1 and %add2. So,
  97. /// ; it hashes to a different number.
  98. /// %add3 = add i64 %e, %f; Hash: 2
  99. /// \endcode
  100. ///
  101. ///
  102. /// This hashing scheme will be used to represent the program as a very long
  103. /// string. This string can then be placed in a data structure which can be used
  104. /// for similarity queries.
  105. ///
  106. /// TODO: Handle types of Instructions which can be equal even with different
  107. /// operands. (E.g. comparisons with swapped predicates.)
  108. /// TODO: Handle CallInsts, which are only checked for function type
  109. /// by \ref isSameOperationAs.
  110. /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
  111. /// exact same, and some do not.
  112. struct IRInstructionData
  113. : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
  114. /// The source Instruction that is being wrapped.
  115. Instruction *Inst = nullptr;
  116. /// The values of the operands in the Instruction.
  117. SmallVector<Value *, 4> OperVals;
  118. /// The legality of the wrapped instruction. This is informed by InstrType,
  119. /// and is used when checking when two instructions are considered similar.
  120. /// If either instruction is not legal, the instructions are automatically not
  121. /// considered similar.
  122. bool Legal = false;
  123. /// This is only relevant if we are wrapping a CmpInst where we needed to
  124. /// change the predicate of a compare instruction from a greater than form
  125. /// to a less than form. It is None otherwise.
  126. Optional<CmpInst::Predicate> RevisedPredicate;
  127. /// This is only relevant if we are wrapping a CallInst. If we are requiring
  128. /// that the function calls have matching names as well as types, and the
  129. /// call is not an indirect call, this will hold the name of the function. If
  130. /// it is an indirect string, it will be the empty string. However, if this
  131. /// requirement is not in place it will be the empty string regardless of the
  132. /// function call type. The value held here is used to create the hash of the
  133. /// instruction, and check to make sure two instructions are close to one
  134. /// another.
  135. Optional<std::string> CalleeName;
  136. /// This structure holds the distances of how far "ahead of" or "behind" the
  137. /// target blocks of a branch, or the incoming blocks of a phi nodes are.
  138. /// If the value is negative, it means that the block was registered before
  139. /// the block of this instruction in terms of blocks in the function.
  140. /// Code Example:
  141. /// \code
  142. /// block_1:
  143. /// br i1 %0, label %block_2, label %block_3
  144. /// block_2:
  145. /// br i1 %1, label %block_1, label %block_2
  146. /// block_3:
  147. /// br i1 %2, label %block_2, label %block_1
  148. /// ; Replacing the labels with relative values, this becomes:
  149. /// block_1:
  150. /// br i1 %0, distance 1, distance 2
  151. /// block_2:
  152. /// br i1 %1, distance -1, distance 0
  153. /// block_3:
  154. /// br i1 %2, distance -1, distance -2
  155. /// \endcode
  156. /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
  157. /// "ahead" of block_2.
  158. SmallVector<int, 4> RelativeBlockLocations;
  159. /// Gather the information that is difficult to gather for an Instruction, or
  160. /// is changed. i.e. the operands of an Instruction and the Types of those
  161. /// operands. This extra information allows for similarity matching to make
  162. /// assertions that allow for more flexibility when checking for whether an
  163. /// Instruction performs the same operation.
  164. IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
  165. IRInstructionData(IRInstructionDataList &IDL);
  166. /// Fills data stuctures for IRInstructionData when it is constructed from a
  167. // reference or a pointer.
  168. void initializeInstruction();
  169. /// Get the predicate that the compare instruction is using for hashing the
  170. /// instruction. the IRInstructionData must be wrapping a CmpInst.
  171. CmpInst::Predicate getPredicate() const;
  172. /// Get the callee name that the call instruction is using for hashing the
  173. /// instruction. The IRInstructionData must be wrapping a CallInst.
  174. StringRef getCalleeName() const;
  175. /// A function that swaps the predicates to their less than form if they are
  176. /// in a greater than form. Otherwise, the predicate is unchanged.
  177. ///
  178. /// \param CI - The comparison operation to find a consistent preidcate for.
  179. /// \return the consistent comparison predicate.
  180. static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
  181. /// For an IRInstructionData containing a branch, finds the
  182. /// relative distances from the source basic block to the target by taking
  183. /// the difference of the number assigned to the current basic block and the
  184. /// target basic block of the branch.
  185. ///
  186. /// \param BasicBlockToInteger - The mapping of basic blocks to their location
  187. /// in the module.
  188. void
  189. setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
  190. /// For an IRInstructionData containing a CallInst, set the function name
  191. /// appropriately. This will be an empty string if it is an indirect call,
  192. /// or we are not matching by name of the called function. It will be the
  193. /// name of the function if \p MatchByName is true and it is not an indirect
  194. /// call. We may decide not to match by name in order to expand the
  195. /// size of the regions we can match. If a function name has the same type
  196. /// signature, but the different name, the region of code is still almost the
  197. /// same. Since function names can be treated as constants, the name itself
  198. /// could be extrapolated away. However, matching by name provides a
  199. /// specificity and more "identical" code than not matching by name.
  200. ///
  201. /// \param MatchByName - A flag to mark whether we are using the called
  202. /// function name as a differentiating parameter.
  203. void setCalleeName(bool MatchByName = true);
  204. /// For an IRInstructionData containing a PHINode, finds the
  205. /// relative distances from the incoming basic block to the current block by
  206. /// taking the difference of the number assigned to the current basic block
  207. /// and the incoming basic block of the branch.
  208. ///
  209. /// \param BasicBlockToInteger - The mapping of basic blocks to their location
  210. /// in the module.
  211. void
  212. setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
  213. /// Hashes \p Value based on its opcode, types, and operand types.
  214. /// Two IRInstructionData instances produce the same hash when they perform
  215. /// the same operation.
  216. ///
  217. /// As a simple example, consider the following instructions.
  218. ///
  219. /// \code
  220. /// %add1 = add i32 %x1, %y1
  221. /// %add2 = add i32 %x2, %y2
  222. ///
  223. /// %sub = sub i32 %x1, %y1
  224. ///
  225. /// %add_i64 = add i64 %x2, %y2
  226. /// \endcode
  227. ///
  228. /// Because the first two adds operate the same types, and are performing the
  229. /// same action, they will be hashed to the same value.
  230. ///
  231. /// However, the subtraction instruction is not the same as an addition, and
  232. /// will be hashed to a different value.
  233. ///
  234. /// Finally, the last add has a different type compared to the first two add
  235. /// instructions, so it will also be hashed to a different value that any of
  236. /// the previous instructions.
  237. ///
  238. /// \param [in] ID - The IRInstructionData instance to be hashed.
  239. /// \returns A hash_value of the IRInstructionData.
  240. friend hash_code hash_value(const IRInstructionData &ID) {
  241. SmallVector<Type *, 4> OperTypes;
  242. for (Value *V : ID.OperVals)
  243. OperTypes.push_back(V->getType());
  244. if (isa<CmpInst>(ID.Inst))
  245. return llvm::hash_combine(
  246. llvm::hash_value(ID.Inst->getOpcode()),
  247. llvm::hash_value(ID.Inst->getType()),
  248. llvm::hash_value(ID.getPredicate()),
  249. llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  250. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
  251. // To hash intrinsics, we use the opcode, and types like the other
  252. // instructions, but also, the Intrinsic ID, and the Name of the
  253. // intrinsic.
  254. Intrinsic::ID IntrinsicID = II->getIntrinsicID();
  255. return llvm::hash_combine(
  256. llvm::hash_value(ID.Inst->getOpcode()),
  257. llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
  258. llvm::hash_value(*ID.CalleeName),
  259. llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  260. }
  261. if (isa<CallInst>(ID.Inst)) {
  262. std::string FunctionName = *ID.CalleeName;
  263. return llvm::hash_combine(
  264. llvm::hash_value(ID.Inst->getOpcode()),
  265. llvm::hash_value(ID.Inst->getType()),
  266. llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
  267. llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  268. }
  269. return llvm::hash_combine(
  270. llvm::hash_value(ID.Inst->getOpcode()),
  271. llvm::hash_value(ID.Inst->getType()),
  272. llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  273. }
  274. IRInstructionDataList *IDL = nullptr;
  275. };
  276. struct IRInstructionDataList
  277. : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
  278. /// Compare one IRInstructionData class to another IRInstructionData class for
  279. /// whether they are performing a the same operation, and can mapped to the
  280. /// same value. For regular instructions if the hash value is the same, then
  281. /// they will also be close.
  282. ///
  283. /// \param A - The first IRInstructionData class to compare
  284. /// \param B - The second IRInstructionData class to compare
  285. /// \returns true if \p A and \p B are similar enough to be mapped to the same
  286. /// value.
  287. bool isClose(const IRInstructionData &A, const IRInstructionData &B);
  288. struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> {
  289. static inline IRInstructionData *getEmptyKey() { return nullptr; }
  290. static inline IRInstructionData *getTombstoneKey() {
  291. return reinterpret_cast<IRInstructionData *>(-1);
  292. }
  293. static unsigned getHashValue(const IRInstructionData *E) {
  294. using llvm::hash_value;
  295. assert(E && "IRInstructionData is a nullptr?");
  296. return hash_value(*E);
  297. }
  298. static bool isEqual(const IRInstructionData *LHS,
  299. const IRInstructionData *RHS) {
  300. if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
  301. LHS == getEmptyKey() || LHS == getTombstoneKey())
  302. return LHS == RHS;
  303. assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?");
  304. return isClose(*LHS, *RHS);
  305. }
  306. };
  307. /// Helper struct for converting the Instructions in a Module into a vector of
  308. /// unsigned integers. This vector of unsigned integers can be thought of as a
  309. /// "numeric string". This numeric string can then be queried by, for example,
  310. /// data structures that find repeated substrings.
  311. ///
  312. /// This hashing is done per BasicBlock in the module. To hash Instructions
  313. /// based off of their operations, each Instruction is wrapped in an
  314. /// IRInstructionData struct. The unsigned integer for an IRInstructionData
  315. /// depends on:
  316. /// - The hash provided by the IRInstructionData.
  317. /// - Which member of InstrType the IRInstructionData is classified as.
  318. // See InstrType for more details on the possible classifications, and how they
  319. // manifest in the numeric string.
  320. ///
  321. /// The numeric string for an individual BasicBlock is terminated by an unique
  322. /// unsigned integer. This prevents data structures which rely on repetition
  323. /// from matching across BasicBlocks. (For example, the SuffixTree.)
  324. /// As a concrete example, if we have the following two BasicBlocks:
  325. /// \code
  326. /// bb0:
  327. /// %add1 = add i32 %a, %b
  328. /// %add2 = add i32 %c, %d
  329. /// %add3 = add i64 %e, %f
  330. /// bb1:
  331. /// %sub = sub i32 %c, %d
  332. /// \endcode
  333. /// We may hash the Instructions like this (via IRInstructionData):
  334. /// \code
  335. /// bb0:
  336. /// %add1 = add i32 %a, %b ; Hash: 1
  337. /// %add2 = add i32 %c, %d; Hash: 1
  338. /// %add3 = add i64 %e, %f; Hash: 2
  339. /// bb1:
  340. /// %sub = sub i32 %c, %d; Hash: 3
  341. /// %add4 = add i32 %c, %d ; Hash: 1
  342. /// \endcode
  343. /// And produce a "numeric string representation" like so:
  344. /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2
  345. ///
  346. /// TODO: This is very similar to the MachineOutliner, and should be
  347. /// consolidated into the same interface.
  348. struct IRInstructionMapper {
  349. /// The starting illegal instruction number to map to.
  350. ///
  351. /// Set to -3 for compatibility with DenseMapInfo<unsigned>.
  352. unsigned IllegalInstrNumber = static_cast<unsigned>(-3);
  353. /// The next available integer to assign to a legal Instruction to.
  354. unsigned LegalInstrNumber = 0;
  355. /// Correspondence from IRInstructionData to unsigned integers.
  356. DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
  357. InstructionIntegerMap;
  358. /// A mapping for a basic block in a module to its assigned number/location
  359. /// in the module.
  360. DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
  361. /// Set if we added an illegal number in the previous step.
  362. /// Since each illegal number is unique, we only need one of them between
  363. /// each range of legal numbers. This lets us make sure we don't add more
  364. /// than one illegal number per range.
  365. bool AddedIllegalLastTime = false;
  366. /// Marks whether we found a illegal instruction in the previous step.
  367. bool CanCombineWithPrevInstr = false;
  368. /// Marks whether we have found a set of instructions that is long enough
  369. /// to be considered for similarity.
  370. bool HaveLegalRange = false;
  371. /// Marks whether we should use exact function names, as well as types to
  372. /// find similarity between calls.
  373. bool EnableMatchCallsByName = false;
  374. /// This allocator pointer is in charge of holding on to the IRInstructionData
  375. /// so it is not deallocated until whatever external tool is using it is done
  376. /// with the information.
  377. SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr;
  378. /// This allocator pointer is in charge of creating the IRInstructionDataList
  379. /// so it is not deallocated until whatever external tool is using it is done
  380. /// with the information.
  381. SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr;
  382. /// Get an allocated IRInstructionData struct using the InstDataAllocator.
  383. ///
  384. /// \param I - The Instruction to wrap with IRInstructionData.
  385. /// \param Legality - A boolean value that is true if the instruction is to
  386. /// be considered for similarity, and false if not.
  387. /// \param IDL - The InstructionDataList that the IRInstructionData is
  388. /// inserted into.
  389. /// \returns An allocated IRInstructionData struct.
  390. IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
  391. IRInstructionDataList &IDL);
  392. /// Get an empty allocated IRInstructionData struct using the
  393. /// InstDataAllocator.
  394. ///
  395. /// \param IDL - The InstructionDataList that the IRInstructionData is
  396. /// inserted into.
  397. /// \returns An allocated IRInstructionData struct.
  398. IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
  399. /// Get an allocated IRInstructionDataList object using the IDLAllocator.
  400. ///
  401. /// \returns An allocated IRInstructionDataList object.
  402. IRInstructionDataList *allocateIRInstructionDataList();
  403. IRInstructionDataList *IDL = nullptr;
  404. /// Assigns values to all the basic blocks in function \p F starting from
  405. /// integer \p BBNumber.
  406. ///
  407. /// \param F - The function containing the basic blocks to assign numbers to.
  408. /// \param BBNumber - The number to start from.
  409. void initializeForBBs(Function &F, unsigned &BBNumber) {
  410. for (BasicBlock &BB : F)
  411. BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++));
  412. }
  413. /// Assigns values to all the basic blocks in Module \p M.
  414. /// \param M - The module containing the basic blocks to assign numbers to.
  415. void initializeForBBs(Module &M) {
  416. unsigned BBNumber = 0;
  417. for (Function &F : M)
  418. initializeForBBs(F, BBNumber);
  419. }
  420. /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
  421. /// determined by \p InstrType. Two Instructions are mapped to the same value
  422. /// if they are close as defined by the InstructionData class above.
  423. ///
  424. /// \param [in] BB - The BasicBlock to be mapped to integers.
  425. /// \param [in,out] InstrList - Vector of IRInstructionData to append to.
  426. /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to.
  427. void convertToUnsignedVec(BasicBlock &BB,
  428. std::vector<IRInstructionData *> &InstrList,
  429. std::vector<unsigned> &IntegerMapping);
  430. /// Maps an Instruction to a legal integer.
  431. ///
  432. /// \param [in] It - The Instruction to be mapped to an integer.
  433. /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
  434. /// append to.
  435. /// \param [in,out] InstrListForBB - Vector of InstructionData to append to.
  436. /// \returns The integer \p It was mapped to.
  437. unsigned mapToLegalUnsigned(BasicBlock::iterator &It,
  438. std::vector<unsigned> &IntegerMappingForBB,
  439. std::vector<IRInstructionData *> &InstrListForBB);
  440. /// Maps an Instruction to an illegal integer.
  441. ///
  442. /// \param [in] It - The \p Instruction to be mapped to an integer.
  443. /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
  444. /// append to.
  445. /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to.
  446. /// \param End - true if creating a dummy IRInstructionData at the end of a
  447. /// basic block.
  448. /// \returns The integer \p It was mapped to.
  449. unsigned mapToIllegalUnsigned(
  450. BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
  451. std::vector<IRInstructionData *> &InstrListForBB, bool End = false);
  452. IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA,
  453. SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA)
  454. : InstDataAllocator(IDA), IDLAllocator(IDLA) {
  455. // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
  456. // changed.
  457. assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) &&
  458. "DenseMapInfo<unsigned>'s empty key isn't -1!");
  459. assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
  460. static_cast<unsigned>(-2) &&
  461. "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
  462. IDL = new (IDLAllocator->Allocate())
  463. IRInstructionDataList();
  464. }
  465. /// Custom InstVisitor to classify different instructions for whether it can
  466. /// be analyzed for similarity.
  467. struct InstructionClassification
  468. : public InstVisitor<InstructionClassification, InstrType> {
  469. InstructionClassification() = default;
  470. // TODO: Determine a scheme to resolve when the label is similar enough.
  471. InstrType visitBranchInst(BranchInst &BI) {
  472. if (EnableBranches)
  473. return Legal;
  474. return Illegal;
  475. }
  476. InstrType visitPHINode(PHINode &PN) {
  477. if (EnableBranches)
  478. return Legal;
  479. return Illegal;
  480. }
  481. // TODO: Handle allocas.
  482. InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
  483. // We exclude variable argument instructions since variable arguments
  484. // requires extra checking of the argument list.
  485. InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; }
  486. // We exclude all exception handling cases since they are so context
  487. // dependent.
  488. InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; }
  489. InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; }
  490. // DebugInfo should be included in the regions, but should not be
  491. // analyzed for similarity as it has no bearing on the outcome of the
  492. // program.
  493. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
  494. InstrType visitIntrinsicInst(IntrinsicInst &II) {
  495. // These are disabled due to complications in the CodeExtractor when
  496. // outlining these instructions. For instance, It is unclear what we
  497. // should do when moving only the start or end lifetime instruction into
  498. // an outlined function. Also, assume-like intrinsics could be removed
  499. // from the region, removing arguments, causing discrepencies in the
  500. // number of inputs between different regions.
  501. if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
  502. return Illegal;
  503. return EnableIntrinsics ? Legal : Illegal;
  504. }
  505. // We only allow call instructions where the function has a name and
  506. // is not an indirect call.
  507. InstrType visitCallInst(CallInst &CI) {
  508. Function *F = CI.getCalledFunction();
  509. bool IsIndirectCall = CI.isIndirectCall();
  510. if (IsIndirectCall && !EnableIndirectCalls)
  511. return Illegal;
  512. if (!F && !IsIndirectCall)
  513. return Illegal;
  514. return Legal;
  515. }
  516. // TODO: We do not current handle similarity that changes the control flow.
  517. InstrType visitInvokeInst(InvokeInst &II) { return Illegal; }
  518. // TODO: We do not current handle similarity that changes the control flow.
  519. InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; }
  520. // TODO: Handle interblock similarity.
  521. InstrType visitTerminator(Instruction &I) { return Illegal; }
  522. InstrType visitInstruction(Instruction &I) { return Legal; }
  523. // The flag variable that lets the classifier know whether we should
  524. // allow branches to be checked for similarity.
  525. bool EnableBranches = false;
  526. // The flag variable that lets the classifier know whether we should
  527. // allow indirect calls to be considered legal instructions.
  528. bool EnableIndirectCalls = false;
  529. // Flag that lets the classifier know whether we should allow intrinsics to
  530. // be checked for similarity.
  531. bool EnableIntrinsics = false;
  532. };
  533. /// Maps an Instruction to a member of InstrType.
  534. InstructionClassification InstClassifier;
  535. };
  536. /// This is a class that wraps a range of IRInstructionData from one point to
  537. /// another in the vector of IRInstructionData, which is a region of the
  538. /// program. It is also responsible for defining the structure within this
  539. /// region of instructions.
  540. ///
  541. /// The structure of a region is defined through a value numbering system
  542. /// assigned to each unique value in a region at the creation of the
  543. /// IRSimilarityCandidate.
  544. ///
  545. /// For example, for each Instruction we add a mapping for each new
  546. /// value seen in that Instruction.
  547. /// IR: Mapping Added:
  548. /// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
  549. /// %add2 = add i32 %a, %1 %add2 -> 4
  550. /// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
  551. ///
  552. /// We can compare IRSimilarityCandidates against one another.
  553. /// The \ref isSimilar function compares each IRInstructionData against one
  554. /// another and if we have the same sequences of IRInstructionData that would
  555. /// create the same hash, we have similar IRSimilarityCandidates.
  556. ///
  557. /// We can also compare the structure of IRSimilarityCandidates. If we can
  558. /// create a mapping of registers in the region contained by one
  559. /// IRSimilarityCandidate to the region contained by different
  560. /// IRSimilarityCandidate, they can be considered structurally similar.
  561. ///
  562. /// IRSimilarityCandidate1: IRSimilarityCandidate2:
  563. /// %add1 = add i32 %a, %b %add1 = add i32 %d, %e
  564. /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
  565. /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
  566. ///
  567. /// Can have the following mapping from candidate to candidate of:
  568. /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
  569. /// and can be considered similar.
  570. ///
  571. /// IRSimilarityCandidate1: IRSimilarityCandidate2:
  572. /// %add1 = add i32 %a, %b %add1 = add i32 %d, c4
  573. /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
  574. /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
  575. ///
  576. /// We cannot create the same mapping since the use of c4 is not used in the
  577. /// same way as %b or c2.
  578. class IRSimilarityCandidate {
  579. private:
  580. /// The start index of this IRSimilarityCandidate in the instruction list.
  581. unsigned StartIdx = 0;
  582. /// The number of instructions in this IRSimilarityCandidate.
  583. unsigned Len = 0;
  584. /// The first instruction in this IRSimilarityCandidate.
  585. IRInstructionData *FirstInst = nullptr;
  586. /// The last instruction in this IRSimilarityCandidate.
  587. IRInstructionData *LastInst = nullptr;
  588. /// Global Value Numbering structures
  589. /// @{
  590. /// Stores the mapping of the value to the number assigned to it in the
  591. /// IRSimilarityCandidate.
  592. DenseMap<Value *, unsigned> ValueToNumber;
  593. /// Stores the mapping of the number to the value assigned this number.
  594. DenseMap<unsigned, Value *> NumberToValue;
  595. /// Stores the mapping of a value's number to canonical numbering in the
  596. /// candidate's respective similarity group.
  597. DenseMap<unsigned, unsigned> NumberToCanonNum;
  598. /// Stores the mapping of canonical number in the candidate's respective
  599. /// similarity group to a value number.
  600. DenseMap<unsigned, unsigned> CanonNumToNumber;
  601. /// @}
  602. public:
  603. /// \param StartIdx - The starting location of the region.
  604. /// \param Len - The length of the region.
  605. /// \param FirstInstIt - The starting IRInstructionData of the region.
  606. /// \param LastInstIt - The ending IRInstructionData of the region.
  607. IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
  608. IRInstructionData *FirstInstIt,
  609. IRInstructionData *LastInstIt);
  610. /// \param A - The first IRInstructionCandidate to compare.
  611. /// \param B - The second IRInstructionCandidate to compare.
  612. /// \returns True when every IRInstructionData in \p A is similar to every
  613. /// IRInstructionData in \p B.
  614. static bool isSimilar(const IRSimilarityCandidate &A,
  615. const IRSimilarityCandidate &B);
  616. /// \param [in] A - The first IRInstructionCandidate to compare.
  617. /// \param [in] B - The second IRInstructionCandidate to compare.
  618. /// \returns True when every IRInstructionData in \p A is structurally similar
  619. /// to \p B.
  620. static bool compareStructure(const IRSimilarityCandidate &A,
  621. const IRSimilarityCandidate &B);
  622. /// \param [in] A - The first IRInstructionCandidate to compare.
  623. /// \param [in] B - The second IRInstructionCandidate to compare.
  624. /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
  625. /// candidate \p A to candidate \B.
  626. /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
  627. /// candidate \p B to candidate \A.
  628. /// \returns True when every IRInstructionData in \p A is structurally similar
  629. /// to \p B.
  630. static bool
  631. compareStructure(const IRSimilarityCandidate &A,
  632. const IRSimilarityCandidate &B,
  633. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
  634. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
  635. struct OperandMapping {
  636. /// The IRSimilarityCandidate that holds the instruction the OperVals were
  637. /// pulled from.
  638. const IRSimilarityCandidate &IRSC;
  639. /// The operand values to be analyzed.
  640. ArrayRef<Value *> &OperVals;
  641. /// The current mapping of global value numbers from one IRSimilarityCandidate
  642. /// to another IRSimilarityCandidate.
  643. DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
  644. };
  645. /// A helper struct to hold the candidate, for a branch instruction, the
  646. /// relative location of a label, and the label itself. This is mostly to
  647. /// group the values together before passing them as a bundle to a function.
  648. struct RelativeLocMapping {
  649. /// The IRSimilarityCandidate that holds the instruction the relative
  650. /// location was pulled from.
  651. const IRSimilarityCandidate &IRSC;
  652. /// The relative location to be analyzed.
  653. int RelativeLocation;
  654. /// The corresponding value.
  655. Value *OperVal;
  656. };
  657. /// Compare the operands in \p A and \p B and check that the current mapping
  658. /// of global value numbers from \p A to \p B and \p B to \A is consistent.
  659. ///
  660. /// \param A - The first IRInstructionCandidate, operand values, and current
  661. /// operand mappings to compare.
  662. /// \param B - The second IRInstructionCandidate, operand values, and current
  663. /// operand mappings to compare.
  664. /// \returns true if the IRSimilarityCandidates operands are compatible.
  665. static bool compareNonCommutativeOperandMapping(OperandMapping A,
  666. OperandMapping B);
  667. /// Compare the operands in \p A and \p B and check that the current mapping
  668. /// of global value numbers from \p A to \p B and \p B to \A is consistent
  669. /// given that the operands are commutative.
  670. ///
  671. /// \param A - The first IRInstructionCandidate, operand values, and current
  672. /// operand mappings to compare.
  673. /// \param B - The second IRInstructionCandidate, operand values, and current
  674. /// operand mappings to compare.
  675. /// \returns true if the IRSimilarityCandidates operands are compatible.
  676. static bool compareCommutativeOperandMapping(OperandMapping A,
  677. OperandMapping B);
  678. /// Compare the relative locations in \p A and \p B and check that the
  679. /// distances match if both locations are contained in the region, and that
  680. /// the branches both point outside the region if they do not.
  681. /// Example Region:
  682. /// \code
  683. /// entry:
  684. /// br i1 %0, label %block_1, label %block_3
  685. /// block_0:
  686. /// br i1 %0, label %block_1, label %block_2
  687. /// block_1:
  688. /// br i1 %0, label %block_2, label %block_3
  689. /// block_2:
  690. /// br i1 %1, label %block_1, label %block_4
  691. /// block_3:
  692. /// br i1 %2, label %block_2, label %block_5
  693. /// \endcode
  694. /// If we compare the branches in block_0 and block_1 the relative values are
  695. /// 1 and 2 for both, so we consider this a match.
  696. ///
  697. /// If we compare the branches in entry and block_0 the relative values are
  698. /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not
  699. /// consider them a match.
  700. ///
  701. /// If we compare the branches in block_1 and block_2 the relative values are
  702. /// 1 and 2, and -1 and None respectively. As a result we do not consider
  703. /// these to be the same
  704. ///
  705. /// If we compare the branches in block_2 and block_3 the relative values are
  706. /// -1 and None for both. We do consider these to be a match.
  707. ///
  708. /// \param A - The first IRInstructionCandidate, relative location value,
  709. /// and incoming block.
  710. /// \param B - The second IRInstructionCandidate, relative location value,
  711. /// and incoming block.
  712. /// \returns true if the relative locations match.
  713. static bool checkRelativeLocations(RelativeLocMapping A,
  714. RelativeLocMapping B);
  715. /// Create a mapping from the value numbering to a different separate set of
  716. /// numbers. This will serve as a guide for relating one candidate to another.
  717. /// The canonical number gives use the ability identify which global value
  718. /// number in one candidate relates to the global value number in the other.
  719. ///
  720. /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
  721. /// canonical numbering for.
  722. static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
  723. /// Create a mapping for the value numbering of the calling
  724. /// IRSimilarityCandidate, to a different separate set of numbers, based on
  725. /// the canonical ordering in \p SourceCand. These are defined based on the
  726. /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
  727. /// these relationships should have the same information, just in opposite
  728. /// directions.
  729. ///
  730. /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
  731. /// canonical numbering from.
  732. /// \param ToSourceMapping - The mapping of value numbers from this candidate
  733. /// to \p SourceCand.
  734. /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
  735. /// to this candidate.
  736. void createCanonicalRelationFrom(
  737. IRSimilarityCandidate &SourceCand,
  738. DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
  739. DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
  740. /// \param [in,out] BBSet - The set to track the basic blocks.
  741. void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
  742. for (IRInstructionData &ID : *this) {
  743. BasicBlock *BB = ID.Inst->getParent();
  744. if (BBSet.contains(BB))
  745. continue;
  746. BBSet.insert(BB);
  747. }
  748. }
  749. /// \param [in,out] BBSet - The set to track the basic blocks.
  750. /// \param [in,out] BBList - A list in order of use to track the basic blocks.
  751. void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
  752. SmallVector<BasicBlock *> &BBList) const {
  753. for (IRInstructionData &ID : *this) {
  754. BasicBlock *BB = ID.Inst->getParent();
  755. if (BBSet.contains(BB))
  756. continue;
  757. BBSet.insert(BB);
  758. BBList.push_back(BB);
  759. }
  760. }
  761. /// Compare the start and end indices of the two IRSimilarityCandidates for
  762. /// whether they overlap. If the start instruction of one
  763. /// IRSimilarityCandidate is less than the end instruction of the other, and
  764. /// the start instruction of one is greater than the start instruction of the
  765. /// other, they overlap.
  766. ///
  767. /// \returns true if the IRSimilarityCandidates do not have overlapping
  768. /// instructions.
  769. static bool overlap(const IRSimilarityCandidate &A,
  770. const IRSimilarityCandidate &B);
  771. /// \returns the number of instructions in this Candidate.
  772. unsigned getLength() const { return Len; }
  773. /// \returns the start index of this IRSimilarityCandidate.
  774. unsigned getStartIdx() const { return StartIdx; }
  775. /// \returns the end index of this IRSimilarityCandidate.
  776. unsigned getEndIdx() const { return StartIdx + Len - 1; }
  777. /// \returns The first IRInstructionData.
  778. IRInstructionData *front() const { return FirstInst; }
  779. /// \returns The last IRInstructionData.
  780. IRInstructionData *back() const { return LastInst; }
  781. /// \returns The first Instruction.
  782. Instruction *frontInstruction() { return FirstInst->Inst; }
  783. /// \returns The last Instruction
  784. Instruction *backInstruction() { return LastInst->Inst; }
  785. /// \returns The BasicBlock the IRSimilarityCandidate starts in.
  786. BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
  787. /// \returns The BasicBlock the IRSimilarityCandidate ends in.
  788. BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
  789. /// \returns The Function that the IRSimilarityCandidate is located in.
  790. Function *getFunction() { return getStartBB()->getParent(); }
  791. /// Finds the positive number associated with \p V if it has been mapped.
  792. /// \param [in] V - the Value to find.
  793. /// \returns The positive number corresponding to the value.
  794. /// \returns None if not present.
  795. Optional<unsigned> getGVN(Value *V) {
  796. assert(V != nullptr && "Value is a nullptr?");
  797. DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
  798. if (VNIt == ValueToNumber.end())
  799. return None;
  800. return VNIt->second;
  801. }
  802. /// Finds the Value associate with \p Num if it exists.
  803. /// \param [in] Num - the number to find.
  804. /// \returns The Value associated with the number.
  805. /// \returns None if not present.
  806. Optional<Value *> fromGVN(unsigned Num) {
  807. DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
  808. if (VNIt == NumberToValue.end())
  809. return None;
  810. assert(VNIt->second != nullptr && "Found value is a nullptr!");
  811. return VNIt->second;
  812. }
  813. /// Find the canonical number from the global value number \p N stored in the
  814. /// candidate.
  815. ///
  816. /// \param N - The global value number to find the canonical number for.
  817. /// \returns An optional containing the value, and None if it could not be
  818. /// found.
  819. Optional<unsigned> getCanonicalNum(unsigned N) {
  820. DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N);
  821. if (NCIt == NumberToCanonNum.end())
  822. return None;
  823. return NCIt->second;
  824. }
  825. /// Find the global value number from the canonical number \p N stored in the
  826. /// candidate.
  827. ///
  828. /// \param N - The canonical number to find the global vlaue number for.
  829. /// \returns An optional containing the value, and None if it could not be
  830. /// found.
  831. Optional<unsigned> fromCanonicalNum(unsigned N) {
  832. DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N);
  833. if (CNIt == CanonNumToNumber.end())
  834. return None;
  835. return CNIt->second;
  836. }
  837. /// \param RHS -The IRSimilarityCandidate to compare against
  838. /// \returns true if the IRSimilarityCandidate is occurs after the
  839. /// IRSimilarityCandidate in the program.
  840. bool operator<(const IRSimilarityCandidate &RHS) const {
  841. return getStartIdx() > RHS.getStartIdx();
  842. }
  843. using iterator = IRInstructionDataList::iterator;
  844. iterator begin() const { return iterator(front()); }
  845. iterator end() const { return std::next(iterator(back())); }
  846. };
  847. typedef DenseMap<IRSimilarityCandidate *,
  848. DenseMap<unsigned, DenseSet<unsigned>>>
  849. CandidateGVNMapping;
  850. typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
  851. typedef std::vector<SimilarityGroup> SimilarityGroupList;
  852. /// This class puts all the pieces of the IRInstructionData,
  853. /// IRInstructionMapper, IRSimilarityCandidate together.
  854. ///
  855. /// It first feeds the Module or vector of Modules into the IRInstructionMapper,
  856. /// and puts all the mapped instructions into a single long list of
  857. /// IRInstructionData.
  858. ///
  859. /// The list of unsigned integers is given to the Suffix Tree or similar data
  860. /// structure to find repeated subsequences. We construct an
  861. /// IRSimilarityCandidate for each instance of the subsequence. We compare them
  862. /// against one another since These repeated subsequences can have different
  863. /// structure. For each different kind of structure found, we create a
  864. /// similarity group.
  865. ///
  866. /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are
  867. /// structurally similar to one another, while C is different we would have two
  868. /// SimilarityGroups:
  869. ///
  870. /// SimilarityGroup 1: SimilarityGroup 2
  871. /// A, B, D C
  872. ///
  873. /// A list of the different similarity groups is then returned after
  874. /// analyzing the module.
  875. class IRSimilarityIdentifier {
  876. public:
  877. IRSimilarityIdentifier(bool MatchBranches = true,
  878. bool MatchIndirectCalls = true,
  879. bool MatchCallsWithName = false,
  880. bool MatchIntrinsics = true)
  881. : Mapper(&InstDataAllocator, &InstDataListAllocator),
  882. EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
  883. EnableMatchingCallsByName(MatchCallsWithName),
  884. EnableIntrinsics(MatchIntrinsics) {}
  885. private:
  886. /// Map the instructions in the module to unsigned integers, using mapping
  887. /// already present in the Mapper if possible.
  888. ///
  889. /// \param [in] M Module - To map to integers.
  890. /// \param [in,out] InstrList - The vector to append IRInstructionData to.
  891. /// \param [in,out] IntegerMapping - The vector to append integers to.
  892. void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList,
  893. std::vector<unsigned> &IntegerMapping);
  894. /// Map the instructions in the modules vector to unsigned integers, using
  895. /// mapping already present in the mapper if possible.
  896. ///
  897. /// \param [in] Modules - The list of modules to use to populate the mapper
  898. /// \param [in,out] InstrList - The vector to append IRInstructionData to.
  899. /// \param [in,out] IntegerMapping - The vector to append integers to.
  900. void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules,
  901. std::vector<IRInstructionData *> &InstrList,
  902. std::vector<unsigned> &IntegerMapping);
  903. /// Find the similarity candidates in \p InstrList and corresponding
  904. /// \p UnsignedVec
  905. ///
  906. /// \param [in,out] InstrList - The vector to append IRInstructionData to.
  907. /// \param [in,out] IntegerMapping - The vector to append integers to.
  908. /// candidates found in the program.
  909. void findCandidates(std::vector<IRInstructionData *> &InstrList,
  910. std::vector<unsigned> &IntegerMapping);
  911. public:
  912. // Find the IRSimilarityCandidates in the \p Modules and group by structural
  913. // similarity in a SimilarityGroup, each group is returned in a
  914. // SimilarityGroupList.
  915. //
  916. // \param [in] Modules - the modules to analyze.
  917. // \returns The groups of similarity ranges found in the modules.
  918. SimilarityGroupList &
  919. findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules);
  920. // Find the IRSimilarityCandidates in the given Module grouped by structural
  921. // similarity in a SimilarityGroup, contained inside a SimilarityGroupList.
  922. //
  923. // \param [in] M - the module to analyze.
  924. // \returns The groups of similarity ranges found in the module.
  925. SimilarityGroupList &findSimilarity(Module &M);
  926. // Clears \ref SimilarityCandidates if it is already filled by a previous run.
  927. void resetSimilarityCandidates() {
  928. // If we've already analyzed a Module or set of Modules, so we must clear
  929. // the SimilarityCandidates to make sure we do not have only old values
  930. // hanging around.
  931. if (SimilarityCandidates.hasValue())
  932. SimilarityCandidates->clear();
  933. else
  934. SimilarityCandidates = SimilarityGroupList();
  935. }
  936. // \returns The groups of similarity ranges found in the most recently passed
  937. // set of modules.
  938. Optional<SimilarityGroupList> &getSimilarity() {
  939. return SimilarityCandidates;
  940. }
  941. private:
  942. /// The allocator for IRInstructionData.
  943. SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
  944. /// The allocator for IRInstructionDataLists.
  945. SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator;
  946. /// Map Instructions to unsigned integers and wraps the Instruction in an
  947. /// instance of IRInstructionData.
  948. IRInstructionMapper Mapper;
  949. /// The flag variable that marks whether we should check branches for
  950. /// similarity, or only look within basic blocks.
  951. bool EnableBranches = true;
  952. /// The flag variable that marks whether we allow indirect calls to be checked
  953. /// for similarity, or exclude them as a legal instruction.
  954. bool EnableIndirectCalls = true;
  955. /// The flag variable that marks whether we allow calls to be marked as
  956. /// similar if they do not have the same name, only the same calling
  957. /// convention, attributes and type signature.
  958. bool EnableMatchingCallsByName = true;
  959. /// The flag variable that marks whether we should check intrinsics for
  960. /// similarity.
  961. bool EnableIntrinsics = true;
  962. /// The SimilarityGroups found with the most recent run of \ref
  963. /// findSimilarity. None if there is no recent run.
  964. Optional<SimilarityGroupList> SimilarityCandidates;
  965. };
  966. } // end namespace IRSimilarity
  967. /// An analysis pass based on legacy pass manager that runs and returns
  968. /// IRSimilarityIdentifier run on the Module.
  969. class IRSimilarityIdentifierWrapperPass : public ModulePass {
  970. std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI;
  971. public:
  972. static char ID;
  973. IRSimilarityIdentifierWrapperPass();
  974. IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; }
  975. const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; }
  976. bool doInitialization(Module &M) override;
  977. bool doFinalization(Module &M) override;
  978. bool runOnModule(Module &M) override;
  979. void getAnalysisUsage(AnalysisUsage &AU) const override {
  980. AU.setPreservesAll();
  981. }
  982. };
  983. /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the
  984. /// Module.
  985. class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> {
  986. public:
  987. typedef IRSimilarity::IRSimilarityIdentifier Result;
  988. Result run(Module &M, ModuleAnalysisManager &);
  989. private:
  990. friend AnalysisInfoMixin<IRSimilarityAnalysis>;
  991. static AnalysisKey Key;
  992. };
  993. /// Printer pass that uses \c IRSimilarityAnalysis.
  994. class IRSimilarityAnalysisPrinterPass
  995. : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> {
  996. raw_ostream &OS;
  997. public:
  998. explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
  999. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
  1000. };
  1001. } // end namespace llvm
  1002. #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
  1003. #ifdef __GNUC__
  1004. #pragma GCC diagnostic pop
  1005. #endif