PartialInlining.cpp 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562
  1. //===- PartialInlining.cpp - Inline parts of functions --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs partial inlining, typically by inlining an if statement
  10. // that surrounds the body of the function.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/IPO/PartialInlining.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/None.h"
  17. #include "llvm/ADT/Optional.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/BlockFrequencyInfo.h"
  22. #include "llvm/Analysis/BranchProbabilityInfo.h"
  23. #include "llvm/Analysis/InlineCost.h"
  24. #include "llvm/Analysis/LoopInfo.h"
  25. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  26. #include "llvm/Analysis/ProfileSummaryInfo.h"
  27. #include "llvm/Analysis/TargetLibraryInfo.h"
  28. #include "llvm/Analysis/TargetTransformInfo.h"
  29. #include "llvm/IR/Attributes.h"
  30. #include "llvm/IR/BasicBlock.h"
  31. #include "llvm/IR/CFG.h"
  32. #include "llvm/IR/DebugLoc.h"
  33. #include "llvm/IR/DiagnosticInfo.h"
  34. #include "llvm/IR/Dominators.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/IR/InstrTypes.h"
  37. #include "llvm/IR/Instruction.h"
  38. #include "llvm/IR/Instructions.h"
  39. #include "llvm/IR/IntrinsicInst.h"
  40. #include "llvm/IR/Intrinsics.h"
  41. #include "llvm/IR/Module.h"
  42. #include "llvm/IR/User.h"
  43. #include "llvm/InitializePasses.h"
  44. #include "llvm/Pass.h"
  45. #include "llvm/Support/BlockFrequency.h"
  46. #include "llvm/Support/BranchProbability.h"
  47. #include "llvm/Support/Casting.h"
  48. #include "llvm/Support/CommandLine.h"
  49. #include "llvm/Support/ErrorHandling.h"
  50. #include "llvm/Transforms/IPO.h"
  51. #include "llvm/Transforms/Utils/Cloning.h"
  52. #include "llvm/Transforms/Utils/CodeExtractor.h"
  53. #include "llvm/Transforms/Utils/ValueMapper.h"
  54. #include <algorithm>
  55. #include <cassert>
  56. #include <cstdint>
  57. #include <functional>
  58. #include <iterator>
  59. #include <memory>
  60. #include <tuple>
  61. #include <vector>
  62. using namespace llvm;
  63. #define DEBUG_TYPE "partial-inlining"
  64. STATISTIC(NumPartialInlined,
  65. "Number of callsites functions partially inlined into.");
  66. STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
  67. "cold outlined regions were partially "
  68. "inlined into its caller(s).");
  69. STATISTIC(NumColdRegionsFound,
  70. "Number of cold single entry/exit regions found.");
  71. STATISTIC(NumColdRegionsOutlined,
  72. "Number of cold single entry/exit regions outlined.");
  73. // Command line option to disable partial-inlining. The default is false:
  74. static cl::opt<bool>
  75. DisablePartialInlining("disable-partial-inlining", cl::init(false),
  76. cl::Hidden, cl::desc("Disable partial inlining"));
  77. // Command line option to disable multi-region partial-inlining. The default is
  78. // false:
  79. static cl::opt<bool> DisableMultiRegionPartialInline(
  80. "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
  81. cl::desc("Disable multi-region partial inlining"));
  82. // Command line option to force outlining in regions with live exit variables.
  83. // The default is false:
  84. static cl::opt<bool>
  85. ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
  86. cl::desc("Force outline regions with live exits"));
  87. // Command line option to enable marking outline functions with Cold Calling
  88. // Convention. The default is false:
  89. static cl::opt<bool>
  90. MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
  91. cl::desc("Mark outline function calls with ColdCC"));
  92. // This is an option used by testing:
  93. static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
  94. cl::init(false), cl::ZeroOrMore,
  95. cl::ReallyHidden,
  96. cl::desc("Skip Cost Analysis"));
  97. // Used to determine if a cold region is worth outlining based on
  98. // its inlining cost compared to the original function. Default is set at 10%.
  99. // ie. if the cold region reduces the inlining cost of the original function by
  100. // at least 10%.
  101. static cl::opt<float> MinRegionSizeRatio(
  102. "min-region-size-ratio", cl::init(0.1), cl::Hidden,
  103. cl::desc("Minimum ratio comparing relative sizes of each "
  104. "outline candidate and original function"));
  105. // Used to tune the minimum number of execution counts needed in the predecessor
  106. // block to the cold edge. ie. confidence interval.
  107. static cl::opt<unsigned>
  108. MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
  109. cl::desc("Minimum block executions to consider "
  110. "its BranchProbabilityInfo valid"));
  111. // Used to determine when an edge is considered cold. Default is set to 10%. ie.
  112. // if the branch probability is 10% or less, then it is deemed as 'cold'.
  113. static cl::opt<float> ColdBranchRatio(
  114. "cold-branch-ratio", cl::init(0.1), cl::Hidden,
  115. cl::desc("Minimum BranchProbability to consider a region cold."));
  116. static cl::opt<unsigned> MaxNumInlineBlocks(
  117. "max-num-inline-blocks", cl::init(5), cl::Hidden,
  118. cl::desc("Max number of blocks to be partially inlined"));
  119. // Command line option to set the maximum number of partial inlining allowed
  120. // for the module. The default value of -1 means no limit.
  121. static cl::opt<int> MaxNumPartialInlining(
  122. "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
  123. cl::desc("Max number of partial inlining. The default is unlimited"));
  124. // Used only when PGO or user annotated branch data is absent. It is
  125. // the least value that is used to weigh the outline region. If BFI
  126. // produces larger value, the BFI value will be used.
  127. static cl::opt<int>
  128. OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
  129. cl::Hidden, cl::ZeroOrMore,
  130. cl::desc("Relative frequency of outline region to "
  131. "the entry block"));
  132. static cl::opt<unsigned> ExtraOutliningPenalty(
  133. "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
  134. cl::desc("A debug option to add additional penalty to the computed one."));
  135. namespace {
  136. struct FunctionOutliningInfo {
  137. FunctionOutliningInfo() = default;
  138. // Returns the number of blocks to be inlined including all blocks
  139. // in Entries and one return block.
  140. unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
  141. // A set of blocks including the function entry that guard
  142. // the region to be outlined.
  143. SmallVector<BasicBlock *, 4> Entries;
  144. // The return block that is not included in the outlined region.
  145. BasicBlock *ReturnBlock = nullptr;
  146. // The dominating block of the region to be outlined.
  147. BasicBlock *NonReturnBlock = nullptr;
  148. // The set of blocks in Entries that that are predecessors to ReturnBlock
  149. SmallVector<BasicBlock *, 4> ReturnBlockPreds;
  150. };
  151. struct FunctionOutliningMultiRegionInfo {
  152. FunctionOutliningMultiRegionInfo() {}
  153. // Container for outline regions
  154. struct OutlineRegionInfo {
  155. OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
  156. BasicBlock *EntryBlock, BasicBlock *ExitBlock,
  157. BasicBlock *ReturnBlock)
  158. : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
  159. ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
  160. SmallVector<BasicBlock *, 8> Region;
  161. BasicBlock *EntryBlock;
  162. BasicBlock *ExitBlock;
  163. BasicBlock *ReturnBlock;
  164. };
  165. SmallVector<OutlineRegionInfo, 4> ORI;
  166. };
  167. struct PartialInlinerImpl {
  168. PartialInlinerImpl(
  169. function_ref<AssumptionCache &(Function &)> GetAC,
  170. function_ref<AssumptionCache *(Function &)> LookupAC,
  171. function_ref<TargetTransformInfo &(Function &)> GTTI,
  172. function_ref<const TargetLibraryInfo &(Function &)> GTLI,
  173. ProfileSummaryInfo &ProfSI,
  174. function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
  175. : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
  176. GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
  177. bool run(Module &M);
  178. // Main part of the transformation that calls helper functions to find
  179. // outlining candidates, clone & outline the function, and attempt to
  180. // partially inline the resulting function. Returns true if
  181. // inlining was successful, false otherwise. Also returns the outline
  182. // function (only if we partially inlined early returns) as there is a
  183. // possibility to further "peel" early return statements that were left in the
  184. // outline function due to code size.
  185. std::pair<bool, Function *> unswitchFunction(Function &F);
  186. // This class speculatively clones the function to be partial inlined.
  187. // At the end of partial inlining, the remaining callsites to the cloned
  188. // function that are not partially inlined will be fixed up to reference
  189. // the original function, and the cloned function will be erased.
  190. struct FunctionCloner {
  191. // Two constructors, one for single region outlining, the other for
  192. // multi-region outlining.
  193. FunctionCloner(Function *F, FunctionOutliningInfo *OI,
  194. OptimizationRemarkEmitter &ORE,
  195. function_ref<AssumptionCache *(Function &)> LookupAC,
  196. function_ref<TargetTransformInfo &(Function &)> GetTTI);
  197. FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
  198. OptimizationRemarkEmitter &ORE,
  199. function_ref<AssumptionCache *(Function &)> LookupAC,
  200. function_ref<TargetTransformInfo &(Function &)> GetTTI);
  201. ~FunctionCloner();
  202. // Prepare for function outlining: making sure there is only
  203. // one incoming edge from the extracted/outlined region to
  204. // the return block.
  205. void normalizeReturnBlock() const;
  206. // Do function outlining for cold regions.
  207. bool doMultiRegionFunctionOutlining();
  208. // Do function outlining for region after early return block(s).
  209. // NOTE: For vararg functions that do the vararg handling in the outlined
  210. // function, we temporarily generate IR that does not properly
  211. // forward varargs to the outlined function. Calling InlineFunction
  212. // will update calls to the outlined functions to properly forward
  213. // the varargs.
  214. Function *doSingleRegionFunctionOutlining();
  215. Function *OrigFunc = nullptr;
  216. Function *ClonedFunc = nullptr;
  217. typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
  218. // Keep track of Outlined Functions and the basic block they're called from.
  219. SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
  220. // ClonedFunc is inlined in one of its callers after function
  221. // outlining.
  222. bool IsFunctionInlined = false;
  223. // The cost of the region to be outlined.
  224. InstructionCost OutlinedRegionCost = 0;
  225. // ClonedOI is specific to outlining non-early return blocks.
  226. std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
  227. // ClonedOMRI is specific to outlining cold regions.
  228. std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
  229. std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
  230. OptimizationRemarkEmitter &ORE;
  231. function_ref<AssumptionCache *(Function &)> LookupAC;
  232. function_ref<TargetTransformInfo &(Function &)> GetTTI;
  233. };
  234. private:
  235. int NumPartialInlining = 0;
  236. function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
  237. function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
  238. function_ref<TargetTransformInfo &(Function &)> GetTTI;
  239. function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
  240. function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
  241. ProfileSummaryInfo &PSI;
  242. // Return the frequency of the OutlininingBB relative to F's entry point.
  243. // The result is no larger than 1 and is represented using BP.
  244. // (Note that the outlined region's 'head' block can only have incoming
  245. // edges from the guarding entry blocks).
  246. BranchProbability
  247. getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
  248. // Return true if the callee of CB should be partially inlined with
  249. // profit.
  250. bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
  251. BlockFrequency WeightedOutliningRcost,
  252. OptimizationRemarkEmitter &ORE) const;
  253. // Try to inline DuplicateFunction (cloned from F with call to
  254. // the OutlinedFunction into its callers. Return true
  255. // if there is any successful inlining.
  256. bool tryPartialInline(FunctionCloner &Cloner);
  257. // Compute the mapping from use site of DuplicationFunction to the enclosing
  258. // BB's profile count.
  259. void
  260. computeCallsiteToProfCountMap(Function *DuplicateFunction,
  261. DenseMap<User *, uint64_t> &SiteCountMap) const;
  262. bool isLimitReached() const {
  263. return (MaxNumPartialInlining != -1 &&
  264. NumPartialInlining >= MaxNumPartialInlining);
  265. }
  266. static CallBase *getSupportedCallBase(User *U) {
  267. if (isa<CallInst>(U) || isa<InvokeInst>(U))
  268. return cast<CallBase>(U);
  269. llvm_unreachable("All uses must be calls");
  270. return nullptr;
  271. }
  272. static CallBase *getOneCallSiteTo(Function &F) {
  273. User *User = *F.user_begin();
  274. return getSupportedCallBase(User);
  275. }
  276. std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
  277. CallBase *CB = getOneCallSiteTo(F);
  278. DebugLoc DLoc = CB->getDebugLoc();
  279. BasicBlock *Block = CB->getParent();
  280. return std::make_tuple(DLoc, Block);
  281. }
  282. // Returns the costs associated with function outlining:
  283. // - The first value is the non-weighted runtime cost for making the call
  284. // to the outlined function, including the addtional setup cost in the
  285. // outlined function itself;
  286. // - The second value is the estimated size of the new call sequence in
  287. // basic block Cloner.OutliningCallBB;
  288. std::tuple<InstructionCost, InstructionCost>
  289. computeOutliningCosts(FunctionCloner &Cloner) const;
  290. // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
  291. // approximate both the size and runtime cost (Note that in the current
  292. // inline cost analysis, there is no clear distinction there either).
  293. static InstructionCost computeBBInlineCost(BasicBlock *BB,
  294. TargetTransformInfo *TTI);
  295. std::unique_ptr<FunctionOutliningInfo>
  296. computeOutliningInfo(Function &F) const;
  297. std::unique_ptr<FunctionOutliningMultiRegionInfo>
  298. computeOutliningColdRegionsInfo(Function &F,
  299. OptimizationRemarkEmitter &ORE) const;
  300. };
  301. struct PartialInlinerLegacyPass : public ModulePass {
  302. static char ID; // Pass identification, replacement for typeid
  303. PartialInlinerLegacyPass() : ModulePass(ID) {
  304. initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
  305. }
  306. void getAnalysisUsage(AnalysisUsage &AU) const override {
  307. AU.addRequired<AssumptionCacheTracker>();
  308. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  309. AU.addRequired<TargetTransformInfoWrapperPass>();
  310. AU.addRequired<TargetLibraryInfoWrapperPass>();
  311. }
  312. bool runOnModule(Module &M) override {
  313. if (skipModule(M))
  314. return false;
  315. AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
  316. TargetTransformInfoWrapperPass *TTIWP =
  317. &getAnalysis<TargetTransformInfoWrapperPass>();
  318. ProfileSummaryInfo &PSI =
  319. getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  320. auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
  321. return ACT->getAssumptionCache(F);
  322. };
  323. auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
  324. return ACT->lookupAssumptionCache(F);
  325. };
  326. auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
  327. return TTIWP->getTTI(F);
  328. };
  329. auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
  330. return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  331. };
  332. return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
  333. GetTLI, PSI)
  334. .run(M);
  335. }
  336. };
  337. } // end anonymous namespace
  338. std::unique_ptr<FunctionOutliningMultiRegionInfo>
  339. PartialInlinerImpl::computeOutliningColdRegionsInfo(
  340. Function &F, OptimizationRemarkEmitter &ORE) const {
  341. BasicBlock *EntryBlock = &F.front();
  342. DominatorTree DT(F);
  343. LoopInfo LI(DT);
  344. BranchProbabilityInfo BPI(F, LI);
  345. std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
  346. BlockFrequencyInfo *BFI;
  347. if (!GetBFI) {
  348. ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
  349. BFI = ScopedBFI.get();
  350. } else
  351. BFI = &(GetBFI(F));
  352. // Return if we don't have profiling information.
  353. if (!PSI.hasInstrumentationProfile())
  354. return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
  355. std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
  356. std::make_unique<FunctionOutliningMultiRegionInfo>();
  357. auto IsSingleExit =
  358. [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
  359. BasicBlock *ExitBlock = nullptr;
  360. for (auto *Block : BlockList) {
  361. for (BasicBlock *Succ : successors(Block)) {
  362. if (!is_contained(BlockList, Succ)) {
  363. if (ExitBlock) {
  364. ORE.emit([&]() {
  365. return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
  366. &Succ->front())
  367. << "Region dominated by "
  368. << ore::NV("Block", BlockList.front()->getName())
  369. << " has more than one region exit edge.";
  370. });
  371. return nullptr;
  372. }
  373. ExitBlock = Block;
  374. }
  375. }
  376. }
  377. return ExitBlock;
  378. };
  379. auto BBProfileCount = [BFI](BasicBlock *BB) {
  380. return BFI->getBlockProfileCount(BB).getValueOr(0);
  381. };
  382. // Use the same computeBBInlineCost function to compute the cost savings of
  383. // the outlining the candidate region.
  384. TargetTransformInfo *FTTI = &GetTTI(F);
  385. InstructionCost OverallFunctionCost = 0;
  386. for (auto &BB : F)
  387. OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
  388. LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
  389. << "\n";);
  390. InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
  391. [&](auto Cost) { return Cost * MinRegionSizeRatio; });
  392. BranchProbability MinBranchProbability(
  393. static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
  394. MinBlockCounterExecution);
  395. bool ColdCandidateFound = false;
  396. BasicBlock *CurrEntry = EntryBlock;
  397. std::vector<BasicBlock *> DFS;
  398. DenseMap<BasicBlock *, bool> VisitedMap;
  399. DFS.push_back(CurrEntry);
  400. VisitedMap[CurrEntry] = true;
  401. // Use Depth First Search on the basic blocks to find CFG edges that are
  402. // considered cold.
  403. // Cold regions considered must also have its inline cost compared to the
  404. // overall inline cost of the original function. The region is outlined only
  405. // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
  406. // more.
  407. while (!DFS.empty()) {
  408. auto *ThisBB = DFS.back();
  409. DFS.pop_back();
  410. // Only consider regions with predecessor blocks that are considered
  411. // not-cold (default: part of the top 99.99% of all block counters)
  412. // AND greater than our minimum block execution count (default: 100).
  413. if (PSI.isColdBlock(ThisBB, BFI) ||
  414. BBProfileCount(ThisBB) < MinBlockCounterExecution)
  415. continue;
  416. for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
  417. if (VisitedMap[*SI])
  418. continue;
  419. VisitedMap[*SI] = true;
  420. DFS.push_back(*SI);
  421. // If branch isn't cold, we skip to the next one.
  422. BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
  423. if (SuccProb > MinBranchProbability)
  424. continue;
  425. LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
  426. << SI->getName()
  427. << "\nBranch Probability = " << SuccProb << "\n";);
  428. SmallVector<BasicBlock *, 8> DominateVector;
  429. DT.getDescendants(*SI, DominateVector);
  430. assert(!DominateVector.empty() &&
  431. "SI should be reachable and have at least itself as descendant");
  432. // We can only outline single entry regions (for now).
  433. if (!DominateVector.front()->hasNPredecessors(1)) {
  434. LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
  435. << " doesn't have a single predecessor in the "
  436. "dominator tree\n";);
  437. continue;
  438. }
  439. BasicBlock *ExitBlock = nullptr;
  440. // We can only outline single exit regions (for now).
  441. if (!(ExitBlock = IsSingleExit(DominateVector))) {
  442. LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
  443. << " doesn't have a unique successor\n";);
  444. continue;
  445. }
  446. InstructionCost OutlineRegionCost = 0;
  447. for (auto *BB : DominateVector)
  448. OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
  449. LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
  450. << "\n";);
  451. if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
  452. ORE.emit([&]() {
  453. return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
  454. &SI->front())
  455. << ore::NV("Callee", &F)
  456. << " inline cost-savings smaller than "
  457. << ore::NV("Cost", MinOutlineRegionCost);
  458. });
  459. LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
  460. << MinOutlineRegionCost << "\n";);
  461. continue;
  462. }
  463. // For now, ignore blocks that belong to a SISE region that is a
  464. // candidate for outlining. In the future, we may want to look
  465. // at inner regions because the outer region may have live-exit
  466. // variables.
  467. for (auto *BB : DominateVector)
  468. VisitedMap[BB] = true;
  469. // ReturnBlock here means the block after the outline call
  470. BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
  471. FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
  472. DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
  473. OutliningInfo->ORI.push_back(RegInfo);
  474. LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
  475. << DominateVector.front()->getName() << "\n";);
  476. ColdCandidateFound = true;
  477. NumColdRegionsFound++;
  478. }
  479. }
  480. if (ColdCandidateFound)
  481. return OutliningInfo;
  482. return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
  483. }
  484. std::unique_ptr<FunctionOutliningInfo>
  485. PartialInlinerImpl::computeOutliningInfo(Function &F) const {
  486. BasicBlock *EntryBlock = &F.front();
  487. BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
  488. if (!BR || BR->isUnconditional())
  489. return std::unique_ptr<FunctionOutliningInfo>();
  490. // Returns true if Succ is BB's successor
  491. auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
  492. return is_contained(successors(BB), Succ);
  493. };
  494. auto IsReturnBlock = [](BasicBlock *BB) {
  495. Instruction *TI = BB->getTerminator();
  496. return isa<ReturnInst>(TI);
  497. };
  498. auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
  499. if (IsReturnBlock(Succ1))
  500. return std::make_tuple(Succ1, Succ2);
  501. if (IsReturnBlock(Succ2))
  502. return std::make_tuple(Succ2, Succ1);
  503. return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
  504. };
  505. // Detect a triangular shape:
  506. auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
  507. if (IsSuccessor(Succ1, Succ2))
  508. return std::make_tuple(Succ1, Succ2);
  509. if (IsSuccessor(Succ2, Succ1))
  510. return std::make_tuple(Succ2, Succ1);
  511. return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
  512. };
  513. std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
  514. std::make_unique<FunctionOutliningInfo>();
  515. BasicBlock *CurrEntry = EntryBlock;
  516. bool CandidateFound = false;
  517. do {
  518. // The number of blocks to be inlined has already reached
  519. // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
  520. // disables partial inlining for the function.
  521. if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
  522. break;
  523. if (succ_size(CurrEntry) != 2)
  524. break;
  525. BasicBlock *Succ1 = *succ_begin(CurrEntry);
  526. BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
  527. BasicBlock *ReturnBlock, *NonReturnBlock;
  528. std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
  529. if (ReturnBlock) {
  530. OutliningInfo->Entries.push_back(CurrEntry);
  531. OutliningInfo->ReturnBlock = ReturnBlock;
  532. OutliningInfo->NonReturnBlock = NonReturnBlock;
  533. CandidateFound = true;
  534. break;
  535. }
  536. BasicBlock *CommSucc, *OtherSucc;
  537. std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
  538. if (!CommSucc)
  539. break;
  540. OutliningInfo->Entries.push_back(CurrEntry);
  541. CurrEntry = OtherSucc;
  542. } while (true);
  543. if (!CandidateFound)
  544. return std::unique_ptr<FunctionOutliningInfo>();
  545. // There should not be any successors (not in the entry set) other than
  546. // {ReturnBlock, NonReturnBlock}
  547. assert(OutliningInfo->Entries[0] == &F.front() &&
  548. "Function Entry must be the first in Entries vector");
  549. DenseSet<BasicBlock *> Entries;
  550. for (BasicBlock *E : OutliningInfo->Entries)
  551. Entries.insert(E);
  552. // Returns true of BB has Predecessor which is not
  553. // in Entries set.
  554. auto HasNonEntryPred = [Entries](BasicBlock *BB) {
  555. for (auto *Pred : predecessors(BB)) {
  556. if (!Entries.count(Pred))
  557. return true;
  558. }
  559. return false;
  560. };
  561. auto CheckAndNormalizeCandidate =
  562. [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
  563. for (BasicBlock *E : OutliningInfo->Entries) {
  564. for (auto *Succ : successors(E)) {
  565. if (Entries.count(Succ))
  566. continue;
  567. if (Succ == OutliningInfo->ReturnBlock)
  568. OutliningInfo->ReturnBlockPreds.push_back(E);
  569. else if (Succ != OutliningInfo->NonReturnBlock)
  570. return false;
  571. }
  572. // There should not be any outside incoming edges either:
  573. if (HasNonEntryPred(E))
  574. return false;
  575. }
  576. return true;
  577. };
  578. if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
  579. return std::unique_ptr<FunctionOutliningInfo>();
  580. // Now further growing the candidate's inlining region by
  581. // peeling off dominating blocks from the outlining region:
  582. while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
  583. BasicBlock *Cand = OutliningInfo->NonReturnBlock;
  584. if (succ_size(Cand) != 2)
  585. break;
  586. if (HasNonEntryPred(Cand))
  587. break;
  588. BasicBlock *Succ1 = *succ_begin(Cand);
  589. BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
  590. BasicBlock *ReturnBlock, *NonReturnBlock;
  591. std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
  592. if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
  593. break;
  594. if (NonReturnBlock->getSinglePredecessor() != Cand)
  595. break;
  596. // Now grow and update OutlininigInfo:
  597. OutliningInfo->Entries.push_back(Cand);
  598. OutliningInfo->NonReturnBlock = NonReturnBlock;
  599. OutliningInfo->ReturnBlockPreds.push_back(Cand);
  600. Entries.insert(Cand);
  601. }
  602. return OutliningInfo;
  603. }
  604. // Check if there is PGO data or user annotated branch data:
  605. static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
  606. if (F.hasProfileData())
  607. return true;
  608. // Now check if any of the entry block has MD_prof data:
  609. for (auto *E : OI.Entries) {
  610. BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
  611. if (!BR || BR->isUnconditional())
  612. continue;
  613. uint64_t T, F;
  614. if (BR->extractProfMetadata(T, F))
  615. return true;
  616. }
  617. return false;
  618. }
  619. BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
  620. FunctionCloner &Cloner) const {
  621. BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
  622. auto EntryFreq =
  623. Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
  624. auto OutliningCallFreq =
  625. Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
  626. // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
  627. // we outlined any regions, so we may encounter situations where the
  628. // OutliningCallFreq is *slightly* bigger than the EntryFreq.
  629. if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
  630. OutliningCallFreq = EntryFreq;
  631. auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
  632. OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
  633. if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
  634. return OutlineRegionRelFreq;
  635. // When profile data is not available, we need to be conservative in
  636. // estimating the overall savings. Static branch prediction can usually
  637. // guess the branch direction right (taken/non-taken), but the guessed
  638. // branch probability is usually not biased enough. In case when the
  639. // outlined region is predicted to be likely, its probability needs
  640. // to be made higher (more biased) to not under-estimate the cost of
  641. // function outlining. On the other hand, if the outlined region
  642. // is predicted to be less likely, the predicted probablity is usually
  643. // higher than the actual. For instance, the actual probability of the
  644. // less likely target is only 5%, but the guessed probablity can be
  645. // 40%. In the latter case, there is no need for further adjustement.
  646. // FIXME: add an option for this.
  647. if (OutlineRegionRelFreq < BranchProbability(45, 100))
  648. return OutlineRegionRelFreq;
  649. OutlineRegionRelFreq = std::max(
  650. OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
  651. return OutlineRegionRelFreq;
  652. }
  653. bool PartialInlinerImpl::shouldPartialInline(
  654. CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
  655. OptimizationRemarkEmitter &ORE) const {
  656. using namespace ore;
  657. Function *Callee = CB.getCalledFunction();
  658. assert(Callee == Cloner.ClonedFunc);
  659. if (SkipCostAnalysis)
  660. return isInlineViable(*Callee).isSuccess();
  661. Function *Caller = CB.getCaller();
  662. auto &CalleeTTI = GetTTI(*Callee);
  663. bool RemarksEnabled =
  664. Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
  665. DEBUG_TYPE);
  666. InlineCost IC =
  667. getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
  668. GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
  669. if (IC.isAlways()) {
  670. ORE.emit([&]() {
  671. return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
  672. << NV("Callee", Cloner.OrigFunc)
  673. << " should always be fully inlined, not partially";
  674. });
  675. return false;
  676. }
  677. if (IC.isNever()) {
  678. ORE.emit([&]() {
  679. return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
  680. << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
  681. << NV("Caller", Caller)
  682. << " because it should never be inlined (cost=never)";
  683. });
  684. return false;
  685. }
  686. if (!IC) {
  687. ORE.emit([&]() {
  688. return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
  689. << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
  690. << NV("Caller", Caller) << " because too costly to inline (cost="
  691. << NV("Cost", IC.getCost()) << ", threshold="
  692. << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
  693. });
  694. return false;
  695. }
  696. const DataLayout &DL = Caller->getParent()->getDataLayout();
  697. // The savings of eliminating the call:
  698. int NonWeightedSavings = getCallsiteCost(CB, DL);
  699. BlockFrequency NormWeightedSavings(NonWeightedSavings);
  700. // Weighted saving is smaller than weighted cost, return false
  701. if (NormWeightedSavings < WeightedOutliningRcost) {
  702. ORE.emit([&]() {
  703. return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
  704. &CB)
  705. << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
  706. << NV("Caller", Caller) << " runtime overhead (overhead="
  707. << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
  708. << ", savings="
  709. << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
  710. << ")"
  711. << " of making the outlined call is too high";
  712. });
  713. return false;
  714. }
  715. ORE.emit([&]() {
  716. return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
  717. << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
  718. << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
  719. << " (threshold="
  720. << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
  721. });
  722. return true;
  723. }
  724. // TODO: Ideally we should share Inliner's InlineCost Analysis code.
  725. // For now use a simplified version. The returned 'InlineCost' will be used
  726. // to esimate the size cost as well as runtime cost of the BB.
  727. InstructionCost
  728. PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
  729. TargetTransformInfo *TTI) {
  730. InstructionCost InlineCost = 0;
  731. const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
  732. for (Instruction &I : BB->instructionsWithoutDebug()) {
  733. // Skip free instructions.
  734. switch (I.getOpcode()) {
  735. case Instruction::BitCast:
  736. case Instruction::PtrToInt:
  737. case Instruction::IntToPtr:
  738. case Instruction::Alloca:
  739. case Instruction::PHI:
  740. continue;
  741. case Instruction::GetElementPtr:
  742. if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
  743. continue;
  744. break;
  745. default:
  746. break;
  747. }
  748. if (I.isLifetimeStartOrEnd())
  749. continue;
  750. if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
  751. Intrinsic::ID IID = II->getIntrinsicID();
  752. SmallVector<Type *, 4> Tys;
  753. FastMathFlags FMF;
  754. for (Value *Val : II->args())
  755. Tys.push_back(Val->getType());
  756. if (auto *FPMO = dyn_cast<FPMathOperator>(II))
  757. FMF = FPMO->getFastMathFlags();
  758. IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
  759. InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
  760. continue;
  761. }
  762. if (CallInst *CI = dyn_cast<CallInst>(&I)) {
  763. InlineCost += getCallsiteCost(*CI, DL);
  764. continue;
  765. }
  766. if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
  767. InlineCost += getCallsiteCost(*II, DL);
  768. continue;
  769. }
  770. if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
  771. InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
  772. continue;
  773. }
  774. InlineCost += InlineConstants::InstrCost;
  775. }
  776. return InlineCost;
  777. }
  778. std::tuple<InstructionCost, InstructionCost>
  779. PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
  780. InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
  781. for (auto FuncBBPair : Cloner.OutlinedFunctions) {
  782. Function *OutlinedFunc = FuncBBPair.first;
  783. BasicBlock* OutliningCallBB = FuncBBPair.second;
  784. // Now compute the cost of the call sequence to the outlined function
  785. // 'OutlinedFunction' in BB 'OutliningCallBB':
  786. auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
  787. OutliningFuncCallCost +=
  788. computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
  789. // Now compute the cost of the extracted/outlined function itself:
  790. for (BasicBlock &BB : *OutlinedFunc)
  791. OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
  792. }
  793. assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
  794. "Outlined function cost should be no less than the outlined region");
  795. // The code extractor introduces a new root and exit stub blocks with
  796. // additional unconditional branches. Those branches will be eliminated
  797. // later with bb layout. The cost should be adjusted accordingly:
  798. OutlinedFunctionCost -=
  799. 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
  800. InstructionCost OutliningRuntimeOverhead =
  801. OutliningFuncCallCost +
  802. (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
  803. ExtraOutliningPenalty.getValue();
  804. return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
  805. }
  806. // Create the callsite to profile count map which is
  807. // used to update the original function's entry count,
  808. // after the function is partially inlined into the callsite.
  809. void PartialInlinerImpl::computeCallsiteToProfCountMap(
  810. Function *DuplicateFunction,
  811. DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
  812. std::vector<User *> Users(DuplicateFunction->user_begin(),
  813. DuplicateFunction->user_end());
  814. Function *CurrentCaller = nullptr;
  815. std::unique_ptr<BlockFrequencyInfo> TempBFI;
  816. BlockFrequencyInfo *CurrentCallerBFI = nullptr;
  817. auto ComputeCurrBFI = [&,this](Function *Caller) {
  818. // For the old pass manager:
  819. if (!GetBFI) {
  820. DominatorTree DT(*Caller);
  821. LoopInfo LI(DT);
  822. BranchProbabilityInfo BPI(*Caller, LI);
  823. TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
  824. CurrentCallerBFI = TempBFI.get();
  825. } else {
  826. // New pass manager:
  827. CurrentCallerBFI = &(GetBFI(*Caller));
  828. }
  829. };
  830. for (User *User : Users) {
  831. // Don't bother with BlockAddress used by CallBr for asm goto.
  832. if (isa<BlockAddress>(User))
  833. continue;
  834. CallBase *CB = getSupportedCallBase(User);
  835. Function *Caller = CB->getCaller();
  836. if (CurrentCaller != Caller) {
  837. CurrentCaller = Caller;
  838. ComputeCurrBFI(Caller);
  839. } else {
  840. assert(CurrentCallerBFI && "CallerBFI is not set");
  841. }
  842. BasicBlock *CallBB = CB->getParent();
  843. auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
  844. if (Count)
  845. CallSiteToProfCountMap[User] = *Count;
  846. else
  847. CallSiteToProfCountMap[User] = 0;
  848. }
  849. }
  850. PartialInlinerImpl::FunctionCloner::FunctionCloner(
  851. Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
  852. function_ref<AssumptionCache *(Function &)> LookupAC,
  853. function_ref<TargetTransformInfo &(Function &)> GetTTI)
  854. : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
  855. ClonedOI = std::make_unique<FunctionOutliningInfo>();
  856. // Clone the function, so that we can hack away on it.
  857. ValueToValueMapTy VMap;
  858. ClonedFunc = CloneFunction(F, VMap);
  859. ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
  860. ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
  861. for (BasicBlock *BB : OI->Entries)
  862. ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
  863. for (BasicBlock *E : OI->ReturnBlockPreds) {
  864. BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
  865. ClonedOI->ReturnBlockPreds.push_back(NewE);
  866. }
  867. // Go ahead and update all uses to the duplicate, so that we can just
  868. // use the inliner functionality when we're done hacking.
  869. F->replaceAllUsesWith(ClonedFunc);
  870. }
  871. PartialInlinerImpl::FunctionCloner::FunctionCloner(
  872. Function *F, FunctionOutliningMultiRegionInfo *OI,
  873. OptimizationRemarkEmitter &ORE,
  874. function_ref<AssumptionCache *(Function &)> LookupAC,
  875. function_ref<TargetTransformInfo &(Function &)> GetTTI)
  876. : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
  877. ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
  878. // Clone the function, so that we can hack away on it.
  879. ValueToValueMapTy VMap;
  880. ClonedFunc = CloneFunction(F, VMap);
  881. // Go through all Outline Candidate Regions and update all BasicBlock
  882. // information.
  883. for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
  884. OI->ORI) {
  885. SmallVector<BasicBlock *, 8> Region;
  886. for (BasicBlock *BB : RegionInfo.Region)
  887. Region.push_back(cast<BasicBlock>(VMap[BB]));
  888. BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
  889. BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
  890. BasicBlock *NewReturnBlock = nullptr;
  891. if (RegionInfo.ReturnBlock)
  892. NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
  893. FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
  894. Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
  895. ClonedOMRI->ORI.push_back(MappedRegionInfo);
  896. }
  897. // Go ahead and update all uses to the duplicate, so that we can just
  898. // use the inliner functionality when we're done hacking.
  899. F->replaceAllUsesWith(ClonedFunc);
  900. }
  901. void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
  902. auto GetFirstPHI = [](BasicBlock *BB) {
  903. BasicBlock::iterator I = BB->begin();
  904. PHINode *FirstPhi = nullptr;
  905. while (I != BB->end()) {
  906. PHINode *Phi = dyn_cast<PHINode>(I);
  907. if (!Phi)
  908. break;
  909. if (!FirstPhi) {
  910. FirstPhi = Phi;
  911. break;
  912. }
  913. }
  914. return FirstPhi;
  915. };
  916. // Shouldn't need to normalize PHIs if we're not outlining non-early return
  917. // blocks.
  918. if (!ClonedOI)
  919. return;
  920. // Special hackery is needed with PHI nodes that have inputs from more than
  921. // one extracted block. For simplicity, just split the PHIs into a two-level
  922. // sequence of PHIs, some of which will go in the extracted region, and some
  923. // of which will go outside.
  924. BasicBlock *PreReturn = ClonedOI->ReturnBlock;
  925. // only split block when necessary:
  926. PHINode *FirstPhi = GetFirstPHI(PreReturn);
  927. unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
  928. if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
  929. return;
  930. auto IsTrivialPhi = [](PHINode *PN) -> Value * {
  931. Value *CommonValue = PN->getIncomingValue(0);
  932. if (all_of(PN->incoming_values(),
  933. [&](Value *V) { return V == CommonValue; }))
  934. return CommonValue;
  935. return nullptr;
  936. };
  937. ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
  938. ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
  939. BasicBlock::iterator I = PreReturn->begin();
  940. Instruction *Ins = &ClonedOI->ReturnBlock->front();
  941. SmallVector<Instruction *, 4> DeadPhis;
  942. while (I != PreReturn->end()) {
  943. PHINode *OldPhi = dyn_cast<PHINode>(I);
  944. if (!OldPhi)
  945. break;
  946. PHINode *RetPhi =
  947. PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
  948. OldPhi->replaceAllUsesWith(RetPhi);
  949. Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
  950. RetPhi->addIncoming(&*I, PreReturn);
  951. for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
  952. RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
  953. OldPhi->removeIncomingValue(E);
  954. }
  955. // After incoming values splitting, the old phi may become trivial.
  956. // Keeping the trivial phi can introduce definition inside the outline
  957. // region which is live-out, causing necessary overhead (load, store
  958. // arg passing etc).
  959. if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
  960. OldPhi->replaceAllUsesWith(OldPhiVal);
  961. DeadPhis.push_back(OldPhi);
  962. }
  963. ++I;
  964. }
  965. for (auto *DP : DeadPhis)
  966. DP->eraseFromParent();
  967. for (auto *E : ClonedOI->ReturnBlockPreds)
  968. E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
  969. }
  970. bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
  971. auto ComputeRegionCost =
  972. [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
  973. InstructionCost Cost = 0;
  974. for (BasicBlock* BB : Region)
  975. Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
  976. return Cost;
  977. };
  978. assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
  979. if (ClonedOMRI->ORI.empty())
  980. return false;
  981. // The CodeExtractor needs a dominator tree.
  982. DominatorTree DT;
  983. DT.recalculate(*ClonedFunc);
  984. // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
  985. LoopInfo LI(DT);
  986. BranchProbabilityInfo BPI(*ClonedFunc, LI);
  987. ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
  988. // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
  989. CodeExtractorAnalysisCache CEAC(*ClonedFunc);
  990. SetVector<Value *> Inputs, Outputs, Sinks;
  991. for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
  992. ClonedOMRI->ORI) {
  993. InstructionCost CurrentOutlinedRegionCost =
  994. ComputeRegionCost(RegionInfo.Region);
  995. CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
  996. ClonedFuncBFI.get(), &BPI,
  997. LookupAC(*RegionInfo.EntryBlock->getParent()),
  998. /* AllowVarargs */ false);
  999. CE.findInputsOutputs(Inputs, Outputs, Sinks);
  1000. LLVM_DEBUG({
  1001. dbgs() << "inputs: " << Inputs.size() << "\n";
  1002. dbgs() << "outputs: " << Outputs.size() << "\n";
  1003. for (Value *value : Inputs)
  1004. dbgs() << "value used in func: " << *value << "\n";
  1005. for (Value *output : Outputs)
  1006. dbgs() << "instr used in func: " << *output << "\n";
  1007. });
  1008. // Do not extract regions that have live exit variables.
  1009. if (Outputs.size() > 0 && !ForceLiveExit)
  1010. continue;
  1011. if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
  1012. CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
  1013. BasicBlock *OutliningCallBB = OCS->getParent();
  1014. assert(OutliningCallBB->getParent() == ClonedFunc);
  1015. OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
  1016. NumColdRegionsOutlined++;
  1017. OutlinedRegionCost += CurrentOutlinedRegionCost;
  1018. if (MarkOutlinedColdCC) {
  1019. OutlinedFunc->setCallingConv(CallingConv::Cold);
  1020. OCS->setCallingConv(CallingConv::Cold);
  1021. }
  1022. } else
  1023. ORE.emit([&]() {
  1024. return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
  1025. &RegionInfo.Region.front()->front())
  1026. << "Failed to extract region at block "
  1027. << ore::NV("Block", RegionInfo.Region.front());
  1028. });
  1029. }
  1030. return !OutlinedFunctions.empty();
  1031. }
  1032. Function *
  1033. PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
  1034. // Returns true if the block is to be partial inlined into the caller
  1035. // (i.e. not to be extracted to the out of line function)
  1036. auto ToBeInlined = [&, this](BasicBlock *BB) {
  1037. return BB == ClonedOI->ReturnBlock ||
  1038. llvm::is_contained(ClonedOI->Entries, BB);
  1039. };
  1040. assert(ClonedOI && "Expecting OutlineInfo for single region outline");
  1041. // The CodeExtractor needs a dominator tree.
  1042. DominatorTree DT;
  1043. DT.recalculate(*ClonedFunc);
  1044. // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
  1045. LoopInfo LI(DT);
  1046. BranchProbabilityInfo BPI(*ClonedFunc, LI);
  1047. ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
  1048. // Gather up the blocks that we're going to extract.
  1049. std::vector<BasicBlock *> ToExtract;
  1050. auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
  1051. ToExtract.push_back(ClonedOI->NonReturnBlock);
  1052. OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
  1053. ClonedOI->NonReturnBlock, ClonedFuncTTI);
  1054. for (BasicBlock &BB : *ClonedFunc)
  1055. if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
  1056. ToExtract.push_back(&BB);
  1057. // FIXME: the code extractor may hoist/sink more code
  1058. // into the outlined function which may make the outlining
  1059. // overhead (the difference of the outlined function cost
  1060. // and OutliningRegionCost) look larger.
  1061. OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
  1062. }
  1063. // Extract the body of the if.
  1064. CodeExtractorAnalysisCache CEAC(*ClonedFunc);
  1065. Function *OutlinedFunc =
  1066. CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
  1067. ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
  1068. /* AllowVarargs */ true)
  1069. .extractCodeRegion(CEAC);
  1070. if (OutlinedFunc) {
  1071. BasicBlock *OutliningCallBB =
  1072. PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
  1073. assert(OutliningCallBB->getParent() == ClonedFunc);
  1074. OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
  1075. } else
  1076. ORE.emit([&]() {
  1077. return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
  1078. &ToExtract.front()->front())
  1079. << "Failed to extract region at block "
  1080. << ore::NV("Block", ToExtract.front());
  1081. });
  1082. return OutlinedFunc;
  1083. }
  1084. PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
  1085. // Ditch the duplicate, since we're done with it, and rewrite all remaining
  1086. // users (function pointers, etc.) back to the original function.
  1087. ClonedFunc->replaceAllUsesWith(OrigFunc);
  1088. ClonedFunc->eraseFromParent();
  1089. if (!IsFunctionInlined) {
  1090. // Remove each function that was speculatively created if there is no
  1091. // reference.
  1092. for (auto FuncBBPair : OutlinedFunctions) {
  1093. Function *Func = FuncBBPair.first;
  1094. Func->eraseFromParent();
  1095. }
  1096. }
  1097. }
  1098. std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
  1099. if (F.hasAddressTaken())
  1100. return {false, nullptr};
  1101. // Let inliner handle it
  1102. if (F.hasFnAttribute(Attribute::AlwaysInline))
  1103. return {false, nullptr};
  1104. if (F.hasFnAttribute(Attribute::NoInline))
  1105. return {false, nullptr};
  1106. if (PSI.isFunctionEntryCold(&F))
  1107. return {false, nullptr};
  1108. if (F.users().empty())
  1109. return {false, nullptr};
  1110. OptimizationRemarkEmitter ORE(&F);
  1111. // Only try to outline cold regions if we have a profile summary, which
  1112. // implies we have profiling information.
  1113. if (PSI.hasProfileSummary() && F.hasProfileData() &&
  1114. !DisableMultiRegionPartialInline) {
  1115. std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
  1116. computeOutliningColdRegionsInfo(F, ORE);
  1117. if (OMRI) {
  1118. FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
  1119. LLVM_DEBUG({
  1120. dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
  1121. dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
  1122. << "\n";
  1123. });
  1124. bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
  1125. if (DidOutline) {
  1126. LLVM_DEBUG({
  1127. dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
  1128. Cloner.ClonedFunc->print(dbgs());
  1129. dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
  1130. });
  1131. if (tryPartialInline(Cloner))
  1132. return {true, nullptr};
  1133. }
  1134. }
  1135. }
  1136. // Fall-thru to regular partial inlining if we:
  1137. // i) can't find any cold regions to outline, or
  1138. // ii) can't inline the outlined function anywhere.
  1139. std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
  1140. if (!OI)
  1141. return {false, nullptr};
  1142. FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
  1143. Cloner.normalizeReturnBlock();
  1144. Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
  1145. if (!OutlinedFunction)
  1146. return {false, nullptr};
  1147. if (tryPartialInline(Cloner))
  1148. return {true, OutlinedFunction};
  1149. return {false, nullptr};
  1150. }
  1151. bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
  1152. if (Cloner.OutlinedFunctions.empty())
  1153. return false;
  1154. int SizeCost = 0;
  1155. BlockFrequency WeightedRcost;
  1156. int NonWeightedRcost;
  1157. auto OutliningCosts = computeOutliningCosts(Cloner);
  1158. assert(std::get<0>(OutliningCosts).isValid() &&
  1159. std::get<1>(OutliningCosts).isValid() && "Expected valid costs");
  1160. SizeCost = *std::get<0>(OutliningCosts).getValue();
  1161. NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
  1162. // Only calculate RelativeToEntryFreq when we are doing single region
  1163. // outlining.
  1164. BranchProbability RelativeToEntryFreq;
  1165. if (Cloner.ClonedOI)
  1166. RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
  1167. else
  1168. // RelativeToEntryFreq doesn't make sense when we have more than one
  1169. // outlined call because each call will have a different relative frequency
  1170. // to the entry block. We can consider using the average, but the
  1171. // usefulness of that information is questionable. For now, assume we never
  1172. // execute the calls to outlined functions.
  1173. RelativeToEntryFreq = BranchProbability(0, 1);
  1174. WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
  1175. // The call sequence(s) to the outlined function(s) are larger than the sum of
  1176. // the original outlined region size(s), it does not increase the chances of
  1177. // inlining the function with outlining (The inliner uses the size increase to
  1178. // model the cost of inlining a callee).
  1179. if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
  1180. OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
  1181. DebugLoc DLoc;
  1182. BasicBlock *Block;
  1183. std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
  1184. OrigFuncORE.emit([&]() {
  1185. return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
  1186. DLoc, Block)
  1187. << ore::NV("Function", Cloner.OrigFunc)
  1188. << " not partially inlined into callers (Original Size = "
  1189. << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
  1190. << ", Size of call sequence to outlined function = "
  1191. << ore::NV("NewSize", SizeCost) << ")";
  1192. });
  1193. return false;
  1194. }
  1195. assert(Cloner.OrigFunc->users().empty() &&
  1196. "F's users should all be replaced!");
  1197. std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
  1198. Cloner.ClonedFunc->user_end());
  1199. DenseMap<User *, uint64_t> CallSiteToProfCountMap;
  1200. auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
  1201. if (CalleeEntryCount)
  1202. computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
  1203. uint64_t CalleeEntryCountV =
  1204. (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
  1205. bool AnyInline = false;
  1206. for (User *User : Users) {
  1207. // Don't bother with BlockAddress used by CallBr for asm goto.
  1208. if (isa<BlockAddress>(User))
  1209. continue;
  1210. CallBase *CB = getSupportedCallBase(User);
  1211. if (isLimitReached())
  1212. continue;
  1213. OptimizationRemarkEmitter CallerORE(CB->getCaller());
  1214. if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
  1215. continue;
  1216. // Construct remark before doing the inlining, as after successful inlining
  1217. // the callsite is removed.
  1218. OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
  1219. OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
  1220. << ore::NV("Caller", CB->getCaller());
  1221. InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
  1222. // We can only forward varargs when we outlined a single region, else we
  1223. // bail on vararg functions.
  1224. if (!InlineFunction(*CB, IFI, nullptr, true,
  1225. (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
  1226. : nullptr))
  1227. .isSuccess())
  1228. continue;
  1229. CallerORE.emit(OR);
  1230. // Now update the entry count:
  1231. if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
  1232. uint64_t CallSiteCount = CallSiteToProfCountMap[User];
  1233. CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
  1234. }
  1235. AnyInline = true;
  1236. NumPartialInlining++;
  1237. // Update the stats
  1238. if (Cloner.ClonedOI)
  1239. NumPartialInlined++;
  1240. else
  1241. NumColdOutlinePartialInlined++;
  1242. }
  1243. if (AnyInline) {
  1244. Cloner.IsFunctionInlined = true;
  1245. if (CalleeEntryCount)
  1246. Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
  1247. CalleeEntryCountV, CalleeEntryCount->getType()));
  1248. OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
  1249. OrigFuncORE.emit([&]() {
  1250. return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
  1251. << "Partially inlined into at least one caller";
  1252. });
  1253. }
  1254. return AnyInline;
  1255. }
  1256. bool PartialInlinerImpl::run(Module &M) {
  1257. if (DisablePartialInlining)
  1258. return false;
  1259. std::vector<Function *> Worklist;
  1260. Worklist.reserve(M.size());
  1261. for (Function &F : M)
  1262. if (!F.use_empty() && !F.isDeclaration())
  1263. Worklist.push_back(&F);
  1264. bool Changed = false;
  1265. while (!Worklist.empty()) {
  1266. Function *CurrFunc = Worklist.back();
  1267. Worklist.pop_back();
  1268. if (CurrFunc->use_empty())
  1269. continue;
  1270. bool Recursive = false;
  1271. for (User *U : CurrFunc->users())
  1272. if (Instruction *I = dyn_cast<Instruction>(U))
  1273. if (I->getParent()->getParent() == CurrFunc) {
  1274. Recursive = true;
  1275. break;
  1276. }
  1277. if (Recursive)
  1278. continue;
  1279. std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
  1280. if (Result.second)
  1281. Worklist.push_back(Result.second);
  1282. Changed |= Result.first;
  1283. }
  1284. return Changed;
  1285. }
  1286. char PartialInlinerLegacyPass::ID = 0;
  1287. INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
  1288. "Partial Inliner", false, false)
  1289. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  1290. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  1291. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  1292. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  1293. INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
  1294. "Partial Inliner", false, false)
  1295. ModulePass *llvm::createPartialInliningPass() {
  1296. return new PartialInlinerLegacyPass();
  1297. }
  1298. PreservedAnalyses PartialInlinerPass::run(Module &M,
  1299. ModuleAnalysisManager &AM) {
  1300. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  1301. auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
  1302. return FAM.getResult<AssumptionAnalysis>(F);
  1303. };
  1304. auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
  1305. return FAM.getCachedResult<AssumptionAnalysis>(F);
  1306. };
  1307. auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
  1308. return FAM.getResult<BlockFrequencyAnalysis>(F);
  1309. };
  1310. auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
  1311. return FAM.getResult<TargetIRAnalysis>(F);
  1312. };
  1313. auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
  1314. return FAM.getResult<TargetLibraryAnalysis>(F);
  1315. };
  1316. ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
  1317. if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
  1318. GetTLI, PSI, GetBFI)
  1319. .run(M))
  1320. return PreservedAnalyses::none();
  1321. return PreservedAnalyses::all();
  1322. }