IROutliner.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // \file
  15. // The interface file for the IROutliner which is used by the IROutliner Pass.
  16. //
  17. // The outliner uses the IRSimilarityIdentifier to identify the similar regions
  18. // of code. It evaluates each set of IRSimilarityCandidates with an estimate of
  19. // whether it will provide code size reduction. Each region is extracted using
  20. // the code extractor. These extracted functions are consolidated into a single
  21. // function and called from the extracted call site.
  22. //
  23. // For example:
  24. // \code
  25. // %1 = add i32 %a, %b
  26. // %2 = add i32 %b, %a
  27. // %3 = add i32 %b, %a
  28. // %4 = add i32 %a, %b
  29. // \endcode
  30. // would become function
  31. // \code
  32. // define internal void outlined_ir_function(i32 %0, i32 %1) {
  33. // %1 = add i32 %0, %1
  34. // %2 = add i32 %1, %0
  35. // ret void
  36. // }
  37. // \endcode
  38. // with calls:
  39. // \code
  40. // call void outlined_ir_function(i32 %a, i32 %b)
  41. // call void outlined_ir_function(i32 %b, i32 %a)
  42. // \endcode
  43. //
  44. //===----------------------------------------------------------------------===//
  45. #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
  46. #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
  47. #include "llvm/Analysis/IRSimilarityIdentifier.h"
  48. #include "llvm/IR/PassManager.h"
  49. #include "llvm/Support/InstructionCost.h"
  50. #include "llvm/Transforms/Utils/CodeExtractor.h"
  51. struct OutlinableGroup;
  52. namespace llvm {
  53. using namespace CallingConv;
  54. using namespace IRSimilarity;
  55. class Module;
  56. class TargetTransformInfo;
  57. class OptimizationRemarkEmitter;
  58. /// The OutlinableRegion holds all the information for a specific region, or
  59. /// sequence of instructions. This includes what values need to be hoisted to
  60. /// arguments from the extracted function, inputs and outputs to the region, and
  61. /// mapping from the extracted function arguments to overall function arguments.
  62. struct OutlinableRegion {
  63. /// Describes the region of code.
  64. IRSimilarityCandidate *Candidate = nullptr;
  65. /// If this region is outlined, the front and back IRInstructionData could
  66. /// potentially become invalidated if the only new instruction is a call.
  67. /// This ensures that we replace in the instruction in the IRInstructionData.
  68. IRInstructionData *NewFront = nullptr;
  69. IRInstructionData *NewBack = nullptr;
  70. /// The number of extracted inputs from the CodeExtractor.
  71. unsigned NumExtractedInputs = 0;
  72. /// The corresponding BasicBlock with the appropriate stores for this
  73. /// OutlinableRegion in the overall function.
  74. unsigned OutputBlockNum = -1;
  75. /// Mapping the extracted argument number to the argument number in the
  76. /// overall function. Since there will be inputs, such as elevated constants
  77. /// that are not the same in each region in a SimilarityGroup, or values that
  78. /// cannot be sunk into the extracted section in every region, we must keep
  79. /// track of which extracted argument maps to which overall argument.
  80. DenseMap<unsigned, unsigned> ExtractedArgToAgg;
  81. DenseMap<unsigned, unsigned> AggArgToExtracted;
  82. /// Values in the outlined functions will often be replaced by arguments. When
  83. /// finding corresponding values from one region to another, the found value
  84. /// will be the value the argument previously replaced. This structure maps
  85. /// any replaced values for the region to the aggregate aggregate argument
  86. /// in the overall function.
  87. DenseMap<Value *, Value *> RemappedArguments;
  88. /// Marks whether we need to change the order of the arguments when mapping
  89. /// the old extracted function call to the new aggregate outlined function
  90. /// call.
  91. bool ChangedArgOrder = false;
  92. /// Marks whether this region ends in a branch, there is special handling
  93. /// required for the following basic blocks in this case.
  94. bool EndsInBranch = false;
  95. /// The PHIBlocks with their corresponding return block based on the return
  96. /// value as the key.
  97. DenseMap<Value *, BasicBlock *> PHIBlocks;
  98. /// Mapping of the argument number in the deduplicated function
  99. /// to a given constant, which is used when creating the arguments to the call
  100. /// to the newly created deduplicated function. This is handled separately
  101. /// since the CodeExtractor does not recognize constants.
  102. DenseMap<unsigned, Constant *> AggArgToConstant;
  103. /// The global value numbers that are used as outputs for this section. Once
  104. /// extracted, each output will be stored to an output register. This
  105. /// documents the global value numbers that are used in this pattern.
  106. SmallVector<unsigned, 4> GVNStores;
  107. /// Used to create an outlined function.
  108. CodeExtractor *CE = nullptr;
  109. /// The call site of the extracted region.
  110. CallInst *Call = nullptr;
  111. /// The function for the extracted region.
  112. Function *ExtractedFunction = nullptr;
  113. /// Flag for whether we have split out the IRSimilarityCanidate. That is,
  114. /// make the region contained the IRSimilarityCandidate its own BasicBlock.
  115. bool CandidateSplit = false;
  116. /// Flag for whether we should not consider this region for extraction.
  117. bool IgnoreRegion = false;
  118. /// The BasicBlock that is before the start of the region BasicBlock,
  119. /// only defined when the region has been split.
  120. BasicBlock *PrevBB = nullptr;
  121. /// The BasicBlock that contains the starting instruction of the region.
  122. BasicBlock *StartBB = nullptr;
  123. /// The BasicBlock that contains the ending instruction of the region.
  124. BasicBlock *EndBB = nullptr;
  125. /// The BasicBlock that is after the start of the region BasicBlock,
  126. /// only defined when the region has been split.
  127. BasicBlock *FollowBB = nullptr;
  128. /// The Outlinable Group that contains this region and structurally similar
  129. /// regions to this region.
  130. OutlinableGroup *Parent = nullptr;
  131. OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
  132. : Candidate(&C), Parent(&Group) {
  133. StartBB = C.getStartBB();
  134. EndBB = C.getEndBB();
  135. }
  136. /// For the contained region, split the parent BasicBlock at the starting and
  137. /// ending instructions of the contained IRSimilarityCandidate.
  138. void splitCandidate();
  139. /// For the contained region, reattach the BasicBlock at the starting and
  140. /// ending instructions of the contained IRSimilarityCandidate, or if the
  141. /// function has been extracted, the start and end of the BasicBlock
  142. /// containing the called function.
  143. void reattachCandidate();
  144. /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
  145. ///
  146. /// \param Other [in] - The OutlinableRegion to find the corresponding Value
  147. /// in.
  148. /// \param V [in] - The Value to look for in the other region.
  149. /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
  150. Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
  151. /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other.
  152. ///
  153. /// \param Other [in] - The OutlinableRegion to find the corresponding
  154. /// BasicBlock in.
  155. /// \param BB [in] - The BasicBlock to look for in the other region.
  156. /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
  157. BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other,
  158. BasicBlock *BB);
  159. /// Get the size of the code removed from the region.
  160. ///
  161. /// \param [in] TTI - The TargetTransformInfo for the parent function.
  162. /// \returns the code size of the region
  163. InstructionCost getBenefit(TargetTransformInfo &TTI);
  164. };
  165. /// This class is a pass that identifies similarity in a Module, extracts
  166. /// instances of the similarity, and then consolidating the similar regions
  167. /// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
  168. /// to identify the similar regions of code, and then extracts the similar
  169. /// sections into a single function. See the above for an example as to
  170. /// how code is extracted and consolidated into a single function.
  171. class IROutliner {
  172. public:
  173. IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
  174. function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
  175. function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
  176. : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
  177. // Check that the DenseMap implementation has not changed.
  178. assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
  179. "DenseMapInfo<unsigned>'s empty key isn't -1!");
  180. assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
  181. "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
  182. }
  183. bool run(Module &M);
  184. private:
  185. /// Find repeated similar code sequences in \p M and outline them into new
  186. /// Functions.
  187. ///
  188. /// \param [in] M - The module to outline from.
  189. /// \returns The number of Functions created.
  190. unsigned doOutline(Module &M);
  191. /// Check whether an OutlinableRegion is incompatible with code already
  192. /// outlined. OutlinableRegions are incomptaible when there are overlapping
  193. /// instructions, or code that has not been recorded has been added to the
  194. /// instructions.
  195. ///
  196. /// \param [in] Region - The OutlinableRegion to check for conflicts with
  197. /// already outlined code.
  198. /// \returns whether the region can safely be outlined.
  199. bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
  200. /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
  201. /// instructions contained in a previously outlined region and put the
  202. /// remaining regions in \p CurrentGroup.
  203. ///
  204. /// \param [in] CandidateVec - List of similarity candidates for regions with
  205. /// the same similarity structure.
  206. /// \param [in,out] CurrentGroup - Contains the potential sections to
  207. /// be outlined.
  208. void
  209. pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
  210. OutlinableGroup &CurrentGroup);
  211. /// Create the function based on the overall types found in the current
  212. /// regions being outlined.
  213. ///
  214. /// \param M - The module to outline from.
  215. /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
  216. /// \param [in] FunctionNameSuffix - How many functions have we previously
  217. /// created.
  218. /// \returns the newly created function.
  219. Function *createFunction(Module &M, OutlinableGroup &CG,
  220. unsigned FunctionNameSuffix);
  221. /// Identify the needed extracted inputs in a section, and add to the overall
  222. /// function if needed.
  223. ///
  224. /// \param [in] M - The module to outline from.
  225. /// \param [in,out] Region - The region to be extracted.
  226. /// \param [in] NotSame - The global value numbers of the Values in the region
  227. /// that do not have the same Constant in each strucutrally similar region.
  228. void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
  229. DenseSet<unsigned> &NotSame);
  230. /// Find the number of instructions that will be removed by extracting the
  231. /// OutlinableRegions in \p CurrentGroup.
  232. ///
  233. /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
  234. /// analyzed.
  235. /// \returns the number of outlined instructions across all regions.
  236. InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
  237. /// Find the number of instructions that will be added by reloading arguments.
  238. ///
  239. /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
  240. /// analyzed.
  241. /// \returns the number of added reload instructions across all regions.
  242. InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
  243. /// Find the cost and the benefit of \p CurrentGroup and save it back to
  244. /// \p CurrentGroup.
  245. ///
  246. /// \param [in] M - The module being analyzed
  247. /// \param [in,out] CurrentGroup - The overall outlined section
  248. void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
  249. /// Update the output mapping based on the load instruction, and the outputs
  250. /// of the extracted function.
  251. ///
  252. /// \param Region - The region extracted
  253. /// \param Outputs - The outputs from the extracted function.
  254. /// \param LI - The load instruction used to update the mapping.
  255. void updateOutputMapping(OutlinableRegion &Region,
  256. ArrayRef<Value *> Outputs, LoadInst *LI);
  257. /// Extract \p Region into its own function.
  258. ///
  259. /// \param [in] Region - The region to be extracted into its own function.
  260. /// \returns True if it was successfully outlined.
  261. bool extractSection(OutlinableRegion &Region);
  262. /// For the similarities found, and the extracted sections, create a single
  263. /// outlined function with appropriate output blocks as necessary.
  264. ///
  265. /// \param [in] M - The module to outline from
  266. /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
  267. /// \param [in,out] FuncsToRemove - List of functions to remove from the
  268. /// module after outlining is completed.
  269. /// \param [in,out] OutlinedFunctionNum - the number of new outlined
  270. /// functions.
  271. void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
  272. std::vector<Function *> &FuncsToRemove,
  273. unsigned &OutlinedFunctionNum);
  274. /// If true, enables us to outline from functions that have LinkOnceFromODR
  275. /// linkages.
  276. bool OutlineFromLinkODRs = false;
  277. /// If false, we do not worry if the cost is greater than the benefit. This
  278. /// is for debugging and testing, so that we can test small cases to ensure
  279. /// that the outlining is being done correctly.
  280. bool CostModel = true;
  281. /// The set of outlined Instructions, identified by their location in the
  282. /// sequential ordering of instructions in a Module.
  283. DenseSet<unsigned> Outlined;
  284. /// TargetTransformInfo lambda for target specific information.
  285. function_ref<TargetTransformInfo &(Function &)> getTTI;
  286. /// A mapping from newly created reloaded output values to the original value.
  287. /// If an value is replace by an output from an outlined region, this maps
  288. /// that Value, back to its original Value.
  289. DenseMap<Value *, Value *> OutputMappings;
  290. /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
  291. function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
  292. /// The optimization remark emitter for the pass.
  293. function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
  294. /// The memory allocator used to allocate the CodeExtractors.
  295. SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
  296. /// The memory allocator used to allocate the OutlinableRegions.
  297. SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
  298. /// The memory allocator used to allocate new IRInstructionData.
  299. SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
  300. /// Custom InstVisitor to classify different instructions for whether it can
  301. /// be analyzed for similarity. This is needed as there may be instruction we
  302. /// can identify as having similarity, but are more complicated to outline.
  303. struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
  304. InstructionAllowed() = default;
  305. bool visitBranchInst(BranchInst &BI) { return EnableBranches; }
  306. bool visitPHINode(PHINode &PN) { return EnableBranches; }
  307. // TODO: Handle allocas.
  308. bool visitAllocaInst(AllocaInst &AI) { return false; }
  309. // VAArg instructions are not allowed since this could cause difficulty when
  310. // differentiating between different sets of variable instructions in
  311. // the deduplicated outlined regions.
  312. bool visitVAArgInst(VAArgInst &VI) { return false; }
  313. // We exclude all exception handling cases since they are so context
  314. // dependent.
  315. bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
  316. bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
  317. // DebugInfo should be included in the regions, but should not be
  318. // analyzed for similarity as it has no bearing on the outcome of the
  319. // program.
  320. bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
  321. // TODO: Handle specific intrinsics individually from those that can be
  322. // handled.
  323. bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
  324. // We only handle CallInsts that are not indirect, since we cannot guarantee
  325. // that they have a name in these cases.
  326. bool visitCallInst(CallInst &CI) {
  327. Function *F = CI.getCalledFunction();
  328. bool IsIndirectCall = CI.isIndirectCall();
  329. if (IsIndirectCall && !EnableIndirectCalls)
  330. return false;
  331. if (!F && !IsIndirectCall)
  332. return false;
  333. // Returning twice can cause issues with the state of the function call
  334. // that were not expected when the function was used, so we do not include
  335. // the call in outlined functions.
  336. if (CI.canReturnTwice())
  337. return false;
  338. // TODO: Update the outliner to capture whether the outlined function
  339. // needs these extra attributes.
  340. // Functions marked with the swifttailcc and tailcc calling conventions
  341. // require special handling when outlining musttail functions. The
  342. // calling convention must be passed down to the outlined function as
  343. // well. Further, there is special handling for musttail calls as well,
  344. // requiring a return call directly after. For now, the outliner does not
  345. // support this.
  346. bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail ||
  347. CI.getCallingConv() == CallingConv::Tail;
  348. if (IsTailCC && !EnableMustTailCalls)
  349. return false;
  350. if (CI.isMustTailCall() && !EnableMustTailCalls)
  351. return false;
  352. // The outliner can only handle musttail items if it is also accompanied
  353. // by the tailcc or swifttailcc calling convention.
  354. if (CI.isMustTailCall() && !IsTailCC)
  355. return false;
  356. return true;
  357. }
  358. // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside
  359. // the outlined region, and then returned as an output, this will have to be
  360. // handled differently.
  361. bool visitFreezeInst(FreezeInst &CI) { return false; }
  362. // TODO: We do not current handle similarity that changes the control flow.
  363. bool visitInvokeInst(InvokeInst &II) { return false; }
  364. // TODO: We do not current handle similarity that changes the control flow.
  365. bool visitCallBrInst(CallBrInst &CBI) { return false; }
  366. // TODO: Handle interblock similarity.
  367. bool visitTerminator(Instruction &I) { return false; }
  368. bool visitInstruction(Instruction &I) { return true; }
  369. // The flag variable that marks whether we should allow branch instructions
  370. // to be outlined.
  371. bool EnableBranches = false;
  372. // The flag variable that marks whether we should allow indirect calls
  373. // to be outlined.
  374. bool EnableIndirectCalls = true;
  375. // The flag variable that marks whether we should allow intrinsics
  376. // instructions to be outlined.
  377. bool EnableIntrinsics = false;
  378. // The flag variable that marks whether we should allow musttail calls.
  379. bool EnableMustTailCalls = false;
  380. };
  381. /// A InstVisitor used to exclude certain instructions from being outlined.
  382. InstructionAllowed InstructionClassifier;
  383. };
  384. /// Pass to outline similar regions.
  385. class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
  386. public:
  387. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
  388. };
  389. } // end namespace llvm
  390. #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
  391. #ifdef __GNUC__
  392. #pragma GCC diagnostic pop
  393. #endif