PartialInlining.cpp 57 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546
  1. //===- PartialInlining.cpp - Inline parts of functions --------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs partial inlining, typically by inlining an if statement
  10. // that surrounds the body of the function.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/IPO/PartialInlining.h"
  14. #include "llvm/ADT/DenseMap.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/Analysis/BlockFrequencyInfo.h"
  20. #include "llvm/Analysis/BranchProbabilityInfo.h"
  21. #include "llvm/Analysis/InlineCost.h"
  22. #include "llvm/Analysis/LoopInfo.h"
  23. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  24. #include "llvm/Analysis/ProfileSummaryInfo.h"
  25. #include "llvm/Analysis/TargetLibraryInfo.h"
  26. #include "llvm/Analysis/TargetTransformInfo.h"
  27. #include "llvm/IR/Attributes.h"
  28. #include "llvm/IR/BasicBlock.h"
  29. #include "llvm/IR/CFG.h"
  30. #include "llvm/IR/DebugLoc.h"
  31. #include "llvm/IR/DiagnosticInfo.h"
  32. #include "llvm/IR/Dominators.h"
  33. #include "llvm/IR/Function.h"
  34. #include "llvm/IR/InstrTypes.h"
  35. #include "llvm/IR/Instruction.h"
  36. #include "llvm/IR/Instructions.h"
  37. #include "llvm/IR/IntrinsicInst.h"
  38. #include "llvm/IR/Intrinsics.h"
  39. #include "llvm/IR/Module.h"
  40. #include "llvm/IR/Operator.h"
  41. #include "llvm/IR/ProfDataUtils.h"
  42. #include "llvm/IR/User.h"
  43. #include "llvm/InitializePasses.h"
  44. #include "llvm/Pass.h"
  45. #include "llvm/Support/BlockFrequency.h"
  46. #include "llvm/Support/BranchProbability.h"
  47. #include "llvm/Support/Casting.h"
  48. #include "llvm/Support/CommandLine.h"
  49. #include "llvm/Support/ErrorHandling.h"
  50. #include "llvm/Transforms/IPO.h"
  51. #include "llvm/Transforms/Utils/Cloning.h"
  52. #include "llvm/Transforms/Utils/CodeExtractor.h"
  53. #include "llvm/Transforms/Utils/ValueMapper.h"
  54. #include <algorithm>
  55. #include <cassert>
  56. #include <cstdint>
  57. #include <memory>
  58. #include <tuple>
  59. #include <vector>
  60. using namespace llvm;
  61. #define DEBUG_TYPE "partial-inlining"
  62. STATISTIC(NumPartialInlined,
  63. "Number of callsites functions partially inlined into.");
  64. STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
  65. "cold outlined regions were partially "
  66. "inlined into its caller(s).");
  67. STATISTIC(NumColdRegionsFound,
  68. "Number of cold single entry/exit regions found.");
  69. STATISTIC(NumColdRegionsOutlined,
  70. "Number of cold single entry/exit regions outlined.");
  71. // Command line option to disable partial-inlining. The default is false:
  72. static cl::opt<bool>
  73. DisablePartialInlining("disable-partial-inlining", cl::init(false),
  74. cl::Hidden, cl::desc("Disable partial inlining"));
  75. // Command line option to disable multi-region partial-inlining. The default is
  76. // false:
  77. static cl::opt<bool> DisableMultiRegionPartialInline(
  78. "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
  79. cl::desc("Disable multi-region partial inlining"));
  80. // Command line option to force outlining in regions with live exit variables.
  81. // The default is false:
  82. static cl::opt<bool>
  83. ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
  84. cl::desc("Force outline regions with live exits"));
  85. // Command line option to enable marking outline functions with Cold Calling
  86. // Convention. The default is false:
  87. static cl::opt<bool>
  88. MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
  89. cl::desc("Mark outline function calls with ColdCC"));
  90. // This is an option used by testing:
  91. static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
  92. cl::ReallyHidden,
  93. cl::desc("Skip Cost Analysis"));
  94. // Used to determine if a cold region is worth outlining based on
  95. // its inlining cost compared to the original function. Default is set at 10%.
  96. // ie. if the cold region reduces the inlining cost of the original function by
  97. // at least 10%.
  98. static cl::opt<float> MinRegionSizeRatio(
  99. "min-region-size-ratio", cl::init(0.1), cl::Hidden,
  100. cl::desc("Minimum ratio comparing relative sizes of each "
  101. "outline candidate and original function"));
  102. // Used to tune the minimum number of execution counts needed in the predecessor
  103. // block to the cold edge. ie. confidence interval.
  104. static cl::opt<unsigned>
  105. MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
  106. cl::desc("Minimum block executions to consider "
  107. "its BranchProbabilityInfo valid"));
  108. // Used to determine when an edge is considered cold. Default is set to 10%. ie.
  109. // if the branch probability is 10% or less, then it is deemed as 'cold'.
  110. static cl::opt<float> ColdBranchRatio(
  111. "cold-branch-ratio", cl::init(0.1), cl::Hidden,
  112. cl::desc("Minimum BranchProbability to consider a region cold."));
  113. static cl::opt<unsigned> MaxNumInlineBlocks(
  114. "max-num-inline-blocks", cl::init(5), cl::Hidden,
  115. cl::desc("Max number of blocks to be partially inlined"));
  116. // Command line option to set the maximum number of partial inlining allowed
  117. // for the module. The default value of -1 means no limit.
  118. static cl::opt<int> MaxNumPartialInlining(
  119. "max-partial-inlining", cl::init(-1), cl::Hidden,
  120. cl::desc("Max number of partial inlining. The default is unlimited"));
  121. // Used only when PGO or user annotated branch data is absent. It is
  122. // the least value that is used to weigh the outline region. If BFI
  123. // produces larger value, the BFI value will be used.
  124. static cl::opt<int>
  125. OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
  126. cl::Hidden,
  127. cl::desc("Relative frequency of outline region to "
  128. "the entry block"));
  129. static cl::opt<unsigned> ExtraOutliningPenalty(
  130. "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
  131. cl::desc("A debug option to add additional penalty to the computed one."));
  132. namespace {
  133. struct FunctionOutliningInfo {
  134. FunctionOutliningInfo() = default;
  135. // Returns the number of blocks to be inlined including all blocks
  136. // in Entries and one return block.
  137. unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
  138. // A set of blocks including the function entry that guard
  139. // the region to be outlined.
  140. SmallVector<BasicBlock *, 4> Entries;
  141. // The return block that is not included in the outlined region.
  142. BasicBlock *ReturnBlock = nullptr;
  143. // The dominating block of the region to be outlined.
  144. BasicBlock *NonReturnBlock = nullptr;
  145. // The set of blocks in Entries that that are predecessors to ReturnBlock
  146. SmallVector<BasicBlock *, 4> ReturnBlockPreds;
  147. };
  148. struct FunctionOutliningMultiRegionInfo {
  149. FunctionOutliningMultiRegionInfo() = default;
  150. // Container for outline regions
  151. struct OutlineRegionInfo {
  152. OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
  153. BasicBlock *EntryBlock, BasicBlock *ExitBlock,
  154. BasicBlock *ReturnBlock)
  155. : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
  156. ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
  157. SmallVector<BasicBlock *, 8> Region;
  158. BasicBlock *EntryBlock;
  159. BasicBlock *ExitBlock;
  160. BasicBlock *ReturnBlock;
  161. };
  162. SmallVector<OutlineRegionInfo, 4> ORI;
  163. };
  164. struct PartialInlinerImpl {
  165. PartialInlinerImpl(
  166. function_ref<AssumptionCache &(Function &)> GetAC,
  167. function_ref<AssumptionCache *(Function &)> LookupAC,
  168. function_ref<TargetTransformInfo &(Function &)> GTTI,
  169. function_ref<const TargetLibraryInfo &(Function &)> GTLI,
  170. ProfileSummaryInfo &ProfSI,
  171. function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
  172. : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
  173. GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
  174. bool run(Module &M);
  175. // Main part of the transformation that calls helper functions to find
  176. // outlining candidates, clone & outline the function, and attempt to
  177. // partially inline the resulting function. Returns true if
  178. // inlining was successful, false otherwise. Also returns the outline
  179. // function (only if we partially inlined early returns) as there is a
  180. // possibility to further "peel" early return statements that were left in the
  181. // outline function due to code size.
  182. std::pair<bool, Function *> unswitchFunction(Function &F);
  183. // This class speculatively clones the function to be partial inlined.
  184. // At the end of partial inlining, the remaining callsites to the cloned
  185. // function that are not partially inlined will be fixed up to reference
  186. // the original function, and the cloned function will be erased.
  187. struct FunctionCloner {
  188. // Two constructors, one for single region outlining, the other for
  189. // multi-region outlining.
  190. FunctionCloner(Function *F, FunctionOutliningInfo *OI,
  191. OptimizationRemarkEmitter &ORE,
  192. function_ref<AssumptionCache *(Function &)> LookupAC,
  193. function_ref<TargetTransformInfo &(Function &)> GetTTI);
  194. FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
  195. OptimizationRemarkEmitter &ORE,
  196. function_ref<AssumptionCache *(Function &)> LookupAC,
  197. function_ref<TargetTransformInfo &(Function &)> GetTTI);
  198. ~FunctionCloner();
  199. // Prepare for function outlining: making sure there is only
  200. // one incoming edge from the extracted/outlined region to
  201. // the return block.
  202. void normalizeReturnBlock() const;
  203. // Do function outlining for cold regions.
  204. bool doMultiRegionFunctionOutlining();
  205. // Do function outlining for region after early return block(s).
  206. // NOTE: For vararg functions that do the vararg handling in the outlined
  207. // function, we temporarily generate IR that does not properly
  208. // forward varargs to the outlined function. Calling InlineFunction
  209. // will update calls to the outlined functions to properly forward
  210. // the varargs.
  211. Function *doSingleRegionFunctionOutlining();
  212. Function *OrigFunc = nullptr;
  213. Function *ClonedFunc = nullptr;
  214. typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
  215. // Keep track of Outlined Functions and the basic block they're called from.
  216. SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
  217. // ClonedFunc is inlined in one of its callers after function
  218. // outlining.
  219. bool IsFunctionInlined = false;
  220. // The cost of the region to be outlined.
  221. InstructionCost OutlinedRegionCost = 0;
  222. // ClonedOI is specific to outlining non-early return blocks.
  223. std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
  224. // ClonedOMRI is specific to outlining cold regions.
  225. std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
  226. std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
  227. OptimizationRemarkEmitter &ORE;
  228. function_ref<AssumptionCache *(Function &)> LookupAC;
  229. function_ref<TargetTransformInfo &(Function &)> GetTTI;
  230. };
  231. private:
  232. int NumPartialInlining = 0;
  233. function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
  234. function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
  235. function_ref<TargetTransformInfo &(Function &)> GetTTI;
  236. function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
  237. function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
  238. ProfileSummaryInfo &PSI;
  239. // Return the frequency of the OutlininingBB relative to F's entry point.
  240. // The result is no larger than 1 and is represented using BP.
  241. // (Note that the outlined region's 'head' block can only have incoming
  242. // edges from the guarding entry blocks).
  243. BranchProbability
  244. getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
  245. // Return true if the callee of CB should be partially inlined with
  246. // profit.
  247. bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
  248. BlockFrequency WeightedOutliningRcost,
  249. OptimizationRemarkEmitter &ORE) const;
  250. // Try to inline DuplicateFunction (cloned from F with call to
  251. // the OutlinedFunction into its callers. Return true
  252. // if there is any successful inlining.
  253. bool tryPartialInline(FunctionCloner &Cloner);
  254. // Compute the mapping from use site of DuplicationFunction to the enclosing
  255. // BB's profile count.
  256. void
  257. computeCallsiteToProfCountMap(Function *DuplicateFunction,
  258. DenseMap<User *, uint64_t> &SiteCountMap) const;
  259. bool isLimitReached() const {
  260. return (MaxNumPartialInlining != -1 &&
  261. NumPartialInlining >= MaxNumPartialInlining);
  262. }
  263. static CallBase *getSupportedCallBase(User *U) {
  264. if (isa<CallInst>(U) || isa<InvokeInst>(U))
  265. return cast<CallBase>(U);
  266. llvm_unreachable("All uses must be calls");
  267. return nullptr;
  268. }
  269. static CallBase *getOneCallSiteTo(Function &F) {
  270. User *User = *F.user_begin();
  271. return getSupportedCallBase(User);
  272. }
  273. std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
  274. CallBase *CB = getOneCallSiteTo(F);
  275. DebugLoc DLoc = CB->getDebugLoc();
  276. BasicBlock *Block = CB->getParent();
  277. return std::make_tuple(DLoc, Block);
  278. }
  279. // Returns the costs associated with function outlining:
  280. // - The first value is the non-weighted runtime cost for making the call
  281. // to the outlined function, including the addtional setup cost in the
  282. // outlined function itself;
  283. // - The second value is the estimated size of the new call sequence in
  284. // basic block Cloner.OutliningCallBB;
  285. std::tuple<InstructionCost, InstructionCost>
  286. computeOutliningCosts(FunctionCloner &Cloner) const;
  287. // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
  288. // approximate both the size and runtime cost (Note that in the current
  289. // inline cost analysis, there is no clear distinction there either).
  290. static InstructionCost computeBBInlineCost(BasicBlock *BB,
  291. TargetTransformInfo *TTI);
  292. std::unique_ptr<FunctionOutliningInfo>
  293. computeOutliningInfo(Function &F) const;
  294. std::unique_ptr<FunctionOutliningMultiRegionInfo>
  295. computeOutliningColdRegionsInfo(Function &F,
  296. OptimizationRemarkEmitter &ORE) const;
  297. };
  298. struct PartialInlinerLegacyPass : public ModulePass {
  299. static char ID; // Pass identification, replacement for typeid
  300. PartialInlinerLegacyPass() : ModulePass(ID) {
  301. initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
  302. }
  303. void getAnalysisUsage(AnalysisUsage &AU) const override {
  304. AU.addRequired<AssumptionCacheTracker>();
  305. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  306. AU.addRequired<TargetTransformInfoWrapperPass>();
  307. AU.addRequired<TargetLibraryInfoWrapperPass>();
  308. }
  309. bool runOnModule(Module &M) override {
  310. if (skipModule(M))
  311. return false;
  312. AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
  313. TargetTransformInfoWrapperPass *TTIWP =
  314. &getAnalysis<TargetTransformInfoWrapperPass>();
  315. ProfileSummaryInfo &PSI =
  316. getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  317. auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
  318. return ACT->getAssumptionCache(F);
  319. };
  320. auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
  321. return ACT->lookupAssumptionCache(F);
  322. };
  323. auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
  324. return TTIWP->getTTI(F);
  325. };
  326. auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
  327. return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  328. };
  329. return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
  330. GetTLI, PSI)
  331. .run(M);
  332. }
  333. };
  334. } // end anonymous namespace
  335. std::unique_ptr<FunctionOutliningMultiRegionInfo>
  336. PartialInlinerImpl::computeOutliningColdRegionsInfo(
  337. Function &F, OptimizationRemarkEmitter &ORE) const {
  338. BasicBlock *EntryBlock = &F.front();
  339. DominatorTree DT(F);
  340. LoopInfo LI(DT);
  341. BranchProbabilityInfo BPI(F, LI);
  342. std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
  343. BlockFrequencyInfo *BFI;
  344. if (!GetBFI) {
  345. ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
  346. BFI = ScopedBFI.get();
  347. } else
  348. BFI = &(GetBFI(F));
  349. // Return if we don't have profiling information.
  350. if (!PSI.hasInstrumentationProfile())
  351. return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
  352. std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
  353. std::make_unique<FunctionOutliningMultiRegionInfo>();
  354. auto IsSingleExit =
  355. [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
  356. BasicBlock *ExitBlock = nullptr;
  357. for (auto *Block : BlockList) {
  358. for (BasicBlock *Succ : successors(Block)) {
  359. if (!is_contained(BlockList, Succ)) {
  360. if (ExitBlock) {
  361. ORE.emit([&]() {
  362. return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
  363. &Succ->front())
  364. << "Region dominated by "
  365. << ore::NV("Block", BlockList.front()->getName())
  366. << " has more than one region exit edge.";
  367. });
  368. return nullptr;
  369. }
  370. ExitBlock = Block;
  371. }
  372. }
  373. }
  374. return ExitBlock;
  375. };
  376. auto BBProfileCount = [BFI](BasicBlock *BB) {
  377. return BFI->getBlockProfileCount(BB).value_or(0);
  378. };
  379. // Use the same computeBBInlineCost function to compute the cost savings of
  380. // the outlining the candidate region.
  381. TargetTransformInfo *FTTI = &GetTTI(F);
  382. InstructionCost OverallFunctionCost = 0;
  383. for (auto &BB : F)
  384. OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
  385. LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
  386. << "\n";);
  387. InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
  388. [&](auto Cost) { return Cost * MinRegionSizeRatio; });
  389. BranchProbability MinBranchProbability(
  390. static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
  391. MinBlockCounterExecution);
  392. bool ColdCandidateFound = false;
  393. BasicBlock *CurrEntry = EntryBlock;
  394. std::vector<BasicBlock *> DFS;
  395. DenseMap<BasicBlock *, bool> VisitedMap;
  396. DFS.push_back(CurrEntry);
  397. VisitedMap[CurrEntry] = true;
  398. // Use Depth First Search on the basic blocks to find CFG edges that are
  399. // considered cold.
  400. // Cold regions considered must also have its inline cost compared to the
  401. // overall inline cost of the original function. The region is outlined only
  402. // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
  403. // more.
  404. while (!DFS.empty()) {
  405. auto *ThisBB = DFS.back();
  406. DFS.pop_back();
  407. // Only consider regions with predecessor blocks that are considered
  408. // not-cold (default: part of the top 99.99% of all block counters)
  409. // AND greater than our minimum block execution count (default: 100).
  410. if (PSI.isColdBlock(ThisBB, BFI) ||
  411. BBProfileCount(ThisBB) < MinBlockCounterExecution)
  412. continue;
  413. for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
  414. if (VisitedMap[*SI])
  415. continue;
  416. VisitedMap[*SI] = true;
  417. DFS.push_back(*SI);
  418. // If branch isn't cold, we skip to the next one.
  419. BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
  420. if (SuccProb > MinBranchProbability)
  421. continue;
  422. LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
  423. << SI->getName()
  424. << "\nBranch Probability = " << SuccProb << "\n";);
  425. SmallVector<BasicBlock *, 8> DominateVector;
  426. DT.getDescendants(*SI, DominateVector);
  427. assert(!DominateVector.empty() &&
  428. "SI should be reachable and have at least itself as descendant");
  429. // We can only outline single entry regions (for now).
  430. if (!DominateVector.front()->hasNPredecessors(1)) {
  431. LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
  432. << " doesn't have a single predecessor in the "
  433. "dominator tree\n";);
  434. continue;
  435. }
  436. BasicBlock *ExitBlock = nullptr;
  437. // We can only outline single exit regions (for now).
  438. if (!(ExitBlock = IsSingleExit(DominateVector))) {
  439. LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
  440. << " doesn't have a unique successor\n";);
  441. continue;
  442. }
  443. InstructionCost OutlineRegionCost = 0;
  444. for (auto *BB : DominateVector)
  445. OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
  446. LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
  447. << "\n";);
  448. if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
  449. ORE.emit([&]() {
  450. return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
  451. &SI->front())
  452. << ore::NV("Callee", &F)
  453. << " inline cost-savings smaller than "
  454. << ore::NV("Cost", MinOutlineRegionCost);
  455. });
  456. LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
  457. << MinOutlineRegionCost << "\n";);
  458. continue;
  459. }
  460. // For now, ignore blocks that belong to a SISE region that is a
  461. // candidate for outlining. In the future, we may want to look
  462. // at inner regions because the outer region may have live-exit
  463. // variables.
  464. for (auto *BB : DominateVector)
  465. VisitedMap[BB] = true;
  466. // ReturnBlock here means the block after the outline call
  467. BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
  468. FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
  469. DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
  470. OutliningInfo->ORI.push_back(RegInfo);
  471. LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
  472. << DominateVector.front()->getName() << "\n";);
  473. ColdCandidateFound = true;
  474. NumColdRegionsFound++;
  475. }
  476. }
  477. if (ColdCandidateFound)
  478. return OutliningInfo;
  479. return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
  480. }
  481. std::unique_ptr<FunctionOutliningInfo>
  482. PartialInlinerImpl::computeOutliningInfo(Function &F) const {
  483. BasicBlock *EntryBlock = &F.front();
  484. BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
  485. if (!BR || BR->isUnconditional())
  486. return std::unique_ptr<FunctionOutliningInfo>();
  487. // Returns true if Succ is BB's successor
  488. auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
  489. return is_contained(successors(BB), Succ);
  490. };
  491. auto IsReturnBlock = [](BasicBlock *BB) {
  492. Instruction *TI = BB->getTerminator();
  493. return isa<ReturnInst>(TI);
  494. };
  495. auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
  496. if (IsReturnBlock(Succ1))
  497. return std::make_tuple(Succ1, Succ2);
  498. if (IsReturnBlock(Succ2))
  499. return std::make_tuple(Succ2, Succ1);
  500. return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
  501. };
  502. // Detect a triangular shape:
  503. auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
  504. if (IsSuccessor(Succ1, Succ2))
  505. return std::make_tuple(Succ1, Succ2);
  506. if (IsSuccessor(Succ2, Succ1))
  507. return std::make_tuple(Succ2, Succ1);
  508. return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
  509. };
  510. std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
  511. std::make_unique<FunctionOutliningInfo>();
  512. BasicBlock *CurrEntry = EntryBlock;
  513. bool CandidateFound = false;
  514. do {
  515. // The number of blocks to be inlined has already reached
  516. // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
  517. // disables partial inlining for the function.
  518. if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
  519. break;
  520. if (succ_size(CurrEntry) != 2)
  521. break;
  522. BasicBlock *Succ1 = *succ_begin(CurrEntry);
  523. BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
  524. BasicBlock *ReturnBlock, *NonReturnBlock;
  525. std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
  526. if (ReturnBlock) {
  527. OutliningInfo->Entries.push_back(CurrEntry);
  528. OutliningInfo->ReturnBlock = ReturnBlock;
  529. OutliningInfo->NonReturnBlock = NonReturnBlock;
  530. CandidateFound = true;
  531. break;
  532. }
  533. BasicBlock *CommSucc, *OtherSucc;
  534. std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
  535. if (!CommSucc)
  536. break;
  537. OutliningInfo->Entries.push_back(CurrEntry);
  538. CurrEntry = OtherSucc;
  539. } while (true);
  540. if (!CandidateFound)
  541. return std::unique_ptr<FunctionOutliningInfo>();
  542. // There should not be any successors (not in the entry set) other than
  543. // {ReturnBlock, NonReturnBlock}
  544. assert(OutliningInfo->Entries[0] == &F.front() &&
  545. "Function Entry must be the first in Entries vector");
  546. DenseSet<BasicBlock *> Entries;
  547. for (BasicBlock *E : OutliningInfo->Entries)
  548. Entries.insert(E);
  549. // Returns true of BB has Predecessor which is not
  550. // in Entries set.
  551. auto HasNonEntryPred = [Entries](BasicBlock *BB) {
  552. for (auto *Pred : predecessors(BB)) {
  553. if (!Entries.count(Pred))
  554. return true;
  555. }
  556. return false;
  557. };
  558. auto CheckAndNormalizeCandidate =
  559. [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
  560. for (BasicBlock *E : OutliningInfo->Entries) {
  561. for (auto *Succ : successors(E)) {
  562. if (Entries.count(Succ))
  563. continue;
  564. if (Succ == OutliningInfo->ReturnBlock)
  565. OutliningInfo->ReturnBlockPreds.push_back(E);
  566. else if (Succ != OutliningInfo->NonReturnBlock)
  567. return false;
  568. }
  569. // There should not be any outside incoming edges either:
  570. if (HasNonEntryPred(E))
  571. return false;
  572. }
  573. return true;
  574. };
  575. if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
  576. return std::unique_ptr<FunctionOutliningInfo>();
  577. // Now further growing the candidate's inlining region by
  578. // peeling off dominating blocks from the outlining region:
  579. while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
  580. BasicBlock *Cand = OutliningInfo->NonReturnBlock;
  581. if (succ_size(Cand) != 2)
  582. break;
  583. if (HasNonEntryPred(Cand))
  584. break;
  585. BasicBlock *Succ1 = *succ_begin(Cand);
  586. BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
  587. BasicBlock *ReturnBlock, *NonReturnBlock;
  588. std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
  589. if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
  590. break;
  591. if (NonReturnBlock->getSinglePredecessor() != Cand)
  592. break;
  593. // Now grow and update OutlininigInfo:
  594. OutliningInfo->Entries.push_back(Cand);
  595. OutliningInfo->NonReturnBlock = NonReturnBlock;
  596. OutliningInfo->ReturnBlockPreds.push_back(Cand);
  597. Entries.insert(Cand);
  598. }
  599. return OutliningInfo;
  600. }
  601. // Check if there is PGO data or user annotated branch data:
  602. static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
  603. if (F.hasProfileData())
  604. return true;
  605. // Now check if any of the entry block has MD_prof data:
  606. for (auto *E : OI.Entries) {
  607. BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
  608. if (!BR || BR->isUnconditional())
  609. continue;
  610. if (hasBranchWeightMD(*BR))
  611. return true;
  612. }
  613. return false;
  614. }
  615. BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
  616. FunctionCloner &Cloner) const {
  617. BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
  618. auto EntryFreq =
  619. Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
  620. auto OutliningCallFreq =
  621. Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
  622. // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
  623. // we outlined any regions, so we may encounter situations where the
  624. // OutliningCallFreq is *slightly* bigger than the EntryFreq.
  625. if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
  626. OutliningCallFreq = EntryFreq;
  627. auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
  628. OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
  629. if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
  630. return OutlineRegionRelFreq;
  631. // When profile data is not available, we need to be conservative in
  632. // estimating the overall savings. Static branch prediction can usually
  633. // guess the branch direction right (taken/non-taken), but the guessed
  634. // branch probability is usually not biased enough. In case when the
  635. // outlined region is predicted to be likely, its probability needs
  636. // to be made higher (more biased) to not under-estimate the cost of
  637. // function outlining. On the other hand, if the outlined region
  638. // is predicted to be less likely, the predicted probablity is usually
  639. // higher than the actual. For instance, the actual probability of the
  640. // less likely target is only 5%, but the guessed probablity can be
  641. // 40%. In the latter case, there is no need for further adjustment.
  642. // FIXME: add an option for this.
  643. if (OutlineRegionRelFreq < BranchProbability(45, 100))
  644. return OutlineRegionRelFreq;
  645. OutlineRegionRelFreq = std::max(
  646. OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
  647. return OutlineRegionRelFreq;
  648. }
  649. bool PartialInlinerImpl::shouldPartialInline(
  650. CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
  651. OptimizationRemarkEmitter &ORE) const {
  652. using namespace ore;
  653. Function *Callee = CB.getCalledFunction();
  654. assert(Callee == Cloner.ClonedFunc);
  655. if (SkipCostAnalysis)
  656. return isInlineViable(*Callee).isSuccess();
  657. Function *Caller = CB.getCaller();
  658. auto &CalleeTTI = GetTTI(*Callee);
  659. bool RemarksEnabled =
  660. Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
  661. DEBUG_TYPE);
  662. InlineCost IC =
  663. getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
  664. GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
  665. if (IC.isAlways()) {
  666. ORE.emit([&]() {
  667. return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
  668. << NV("Callee", Cloner.OrigFunc)
  669. << " should always be fully inlined, not partially";
  670. });
  671. return false;
  672. }
  673. if (IC.isNever()) {
  674. ORE.emit([&]() {
  675. return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
  676. << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
  677. << NV("Caller", Caller)
  678. << " because it should never be inlined (cost=never)";
  679. });
  680. return false;
  681. }
  682. if (!IC) {
  683. ORE.emit([&]() {
  684. return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
  685. << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
  686. << NV("Caller", Caller) << " because too costly to inline (cost="
  687. << NV("Cost", IC.getCost()) << ", threshold="
  688. << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
  689. });
  690. return false;
  691. }
  692. const DataLayout &DL = Caller->getParent()->getDataLayout();
  693. // The savings of eliminating the call:
  694. int NonWeightedSavings = getCallsiteCost(CB, DL);
  695. BlockFrequency NormWeightedSavings(NonWeightedSavings);
  696. // Weighted saving is smaller than weighted cost, return false
  697. if (NormWeightedSavings < WeightedOutliningRcost) {
  698. ORE.emit([&]() {
  699. return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
  700. &CB)
  701. << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
  702. << NV("Caller", Caller) << " runtime overhead (overhead="
  703. << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
  704. << ", savings="
  705. << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
  706. << ")"
  707. << " of making the outlined call is too high";
  708. });
  709. return false;
  710. }
  711. ORE.emit([&]() {
  712. return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
  713. << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
  714. << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
  715. << " (threshold="
  716. << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
  717. });
  718. return true;
  719. }
  720. // TODO: Ideally we should share Inliner's InlineCost Analysis code.
  721. // For now use a simplified version. The returned 'InlineCost' will be used
  722. // to esimate the size cost as well as runtime cost of the BB.
  723. InstructionCost
  724. PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
  725. TargetTransformInfo *TTI) {
  726. InstructionCost InlineCost = 0;
  727. const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
  728. int InstrCost = InlineConstants::getInstrCost();
  729. for (Instruction &I : BB->instructionsWithoutDebug()) {
  730. // Skip free instructions.
  731. switch (I.getOpcode()) {
  732. case Instruction::BitCast:
  733. case Instruction::PtrToInt:
  734. case Instruction::IntToPtr:
  735. case Instruction::Alloca:
  736. case Instruction::PHI:
  737. continue;
  738. case Instruction::GetElementPtr:
  739. if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
  740. continue;
  741. break;
  742. default:
  743. break;
  744. }
  745. if (I.isLifetimeStartOrEnd())
  746. continue;
  747. if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
  748. Intrinsic::ID IID = II->getIntrinsicID();
  749. SmallVector<Type *, 4> Tys;
  750. FastMathFlags FMF;
  751. for (Value *Val : II->args())
  752. Tys.push_back(Val->getType());
  753. if (auto *FPMO = dyn_cast<FPMathOperator>(II))
  754. FMF = FPMO->getFastMathFlags();
  755. IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
  756. InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
  757. continue;
  758. }
  759. if (CallInst *CI = dyn_cast<CallInst>(&I)) {
  760. InlineCost += getCallsiteCost(*CI, DL);
  761. continue;
  762. }
  763. if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
  764. InlineCost += getCallsiteCost(*II, DL);
  765. continue;
  766. }
  767. if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
  768. InlineCost += (SI->getNumCases() + 1) * InstrCost;
  769. continue;
  770. }
  771. InlineCost += InstrCost;
  772. }
  773. return InlineCost;
  774. }
  775. std::tuple<InstructionCost, InstructionCost>
  776. PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
  777. InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
  778. for (auto FuncBBPair : Cloner.OutlinedFunctions) {
  779. Function *OutlinedFunc = FuncBBPair.first;
  780. BasicBlock* OutliningCallBB = FuncBBPair.second;
  781. // Now compute the cost of the call sequence to the outlined function
  782. // 'OutlinedFunction' in BB 'OutliningCallBB':
  783. auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
  784. OutliningFuncCallCost +=
  785. computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
  786. // Now compute the cost of the extracted/outlined function itself:
  787. for (BasicBlock &BB : *OutlinedFunc)
  788. OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
  789. }
  790. assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
  791. "Outlined function cost should be no less than the outlined region");
  792. // The code extractor introduces a new root and exit stub blocks with
  793. // additional unconditional branches. Those branches will be eliminated
  794. // later with bb layout. The cost should be adjusted accordingly:
  795. OutlinedFunctionCost -=
  796. 2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
  797. InstructionCost OutliningRuntimeOverhead =
  798. OutliningFuncCallCost +
  799. (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
  800. ExtraOutliningPenalty.getValue();
  801. return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
  802. }
  803. // Create the callsite to profile count map which is
  804. // used to update the original function's entry count,
  805. // after the function is partially inlined into the callsite.
  806. void PartialInlinerImpl::computeCallsiteToProfCountMap(
  807. Function *DuplicateFunction,
  808. DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
  809. std::vector<User *> Users(DuplicateFunction->user_begin(),
  810. DuplicateFunction->user_end());
  811. Function *CurrentCaller = nullptr;
  812. std::unique_ptr<BlockFrequencyInfo> TempBFI;
  813. BlockFrequencyInfo *CurrentCallerBFI = nullptr;
  814. auto ComputeCurrBFI = [&,this](Function *Caller) {
  815. // For the old pass manager:
  816. if (!GetBFI) {
  817. DominatorTree DT(*Caller);
  818. LoopInfo LI(DT);
  819. BranchProbabilityInfo BPI(*Caller, LI);
  820. TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
  821. CurrentCallerBFI = TempBFI.get();
  822. } else {
  823. // New pass manager:
  824. CurrentCallerBFI = &(GetBFI(*Caller));
  825. }
  826. };
  827. for (User *User : Users) {
  828. // Don't bother with BlockAddress used by CallBr for asm goto.
  829. if (isa<BlockAddress>(User))
  830. continue;
  831. CallBase *CB = getSupportedCallBase(User);
  832. Function *Caller = CB->getCaller();
  833. if (CurrentCaller != Caller) {
  834. CurrentCaller = Caller;
  835. ComputeCurrBFI(Caller);
  836. } else {
  837. assert(CurrentCallerBFI && "CallerBFI is not set");
  838. }
  839. BasicBlock *CallBB = CB->getParent();
  840. auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
  841. if (Count)
  842. CallSiteToProfCountMap[User] = *Count;
  843. else
  844. CallSiteToProfCountMap[User] = 0;
  845. }
  846. }
  847. PartialInlinerImpl::FunctionCloner::FunctionCloner(
  848. Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
  849. function_ref<AssumptionCache *(Function &)> LookupAC,
  850. function_ref<TargetTransformInfo &(Function &)> GetTTI)
  851. : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
  852. ClonedOI = std::make_unique<FunctionOutliningInfo>();
  853. // Clone the function, so that we can hack away on it.
  854. ValueToValueMapTy VMap;
  855. ClonedFunc = CloneFunction(F, VMap);
  856. ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
  857. ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
  858. for (BasicBlock *BB : OI->Entries)
  859. ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
  860. for (BasicBlock *E : OI->ReturnBlockPreds) {
  861. BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
  862. ClonedOI->ReturnBlockPreds.push_back(NewE);
  863. }
  864. // Go ahead and update all uses to the duplicate, so that we can just
  865. // use the inliner functionality when we're done hacking.
  866. F->replaceAllUsesWith(ClonedFunc);
  867. }
  868. PartialInlinerImpl::FunctionCloner::FunctionCloner(
  869. Function *F, FunctionOutliningMultiRegionInfo *OI,
  870. OptimizationRemarkEmitter &ORE,
  871. function_ref<AssumptionCache *(Function &)> LookupAC,
  872. function_ref<TargetTransformInfo &(Function &)> GetTTI)
  873. : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
  874. ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
  875. // Clone the function, so that we can hack away on it.
  876. ValueToValueMapTy VMap;
  877. ClonedFunc = CloneFunction(F, VMap);
  878. // Go through all Outline Candidate Regions and update all BasicBlock
  879. // information.
  880. for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
  881. OI->ORI) {
  882. SmallVector<BasicBlock *, 8> Region;
  883. for (BasicBlock *BB : RegionInfo.Region)
  884. Region.push_back(cast<BasicBlock>(VMap[BB]));
  885. BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
  886. BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
  887. BasicBlock *NewReturnBlock = nullptr;
  888. if (RegionInfo.ReturnBlock)
  889. NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
  890. FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
  891. Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
  892. ClonedOMRI->ORI.push_back(MappedRegionInfo);
  893. }
  894. // Go ahead and update all uses to the duplicate, so that we can just
  895. // use the inliner functionality when we're done hacking.
  896. F->replaceAllUsesWith(ClonedFunc);
  897. }
  898. void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
  899. auto GetFirstPHI = [](BasicBlock *BB) {
  900. BasicBlock::iterator I = BB->begin();
  901. PHINode *FirstPhi = nullptr;
  902. while (I != BB->end()) {
  903. PHINode *Phi = dyn_cast<PHINode>(I);
  904. if (!Phi)
  905. break;
  906. if (!FirstPhi) {
  907. FirstPhi = Phi;
  908. break;
  909. }
  910. }
  911. return FirstPhi;
  912. };
  913. // Shouldn't need to normalize PHIs if we're not outlining non-early return
  914. // blocks.
  915. if (!ClonedOI)
  916. return;
  917. // Special hackery is needed with PHI nodes that have inputs from more than
  918. // one extracted block. For simplicity, just split the PHIs into a two-level
  919. // sequence of PHIs, some of which will go in the extracted region, and some
  920. // of which will go outside.
  921. BasicBlock *PreReturn = ClonedOI->ReturnBlock;
  922. // only split block when necessary:
  923. PHINode *FirstPhi = GetFirstPHI(PreReturn);
  924. unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
  925. if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
  926. return;
  927. auto IsTrivialPhi = [](PHINode *PN) -> Value * {
  928. if (llvm::all_equal(PN->incoming_values()))
  929. return PN->getIncomingValue(0);
  930. return nullptr;
  931. };
  932. ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
  933. ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
  934. BasicBlock::iterator I = PreReturn->begin();
  935. Instruction *Ins = &ClonedOI->ReturnBlock->front();
  936. SmallVector<Instruction *, 4> DeadPhis;
  937. while (I != PreReturn->end()) {
  938. PHINode *OldPhi = dyn_cast<PHINode>(I);
  939. if (!OldPhi)
  940. break;
  941. PHINode *RetPhi =
  942. PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
  943. OldPhi->replaceAllUsesWith(RetPhi);
  944. Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
  945. RetPhi->addIncoming(&*I, PreReturn);
  946. for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
  947. RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
  948. OldPhi->removeIncomingValue(E);
  949. }
  950. // After incoming values splitting, the old phi may become trivial.
  951. // Keeping the trivial phi can introduce definition inside the outline
  952. // region which is live-out, causing necessary overhead (load, store
  953. // arg passing etc).
  954. if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
  955. OldPhi->replaceAllUsesWith(OldPhiVal);
  956. DeadPhis.push_back(OldPhi);
  957. }
  958. ++I;
  959. }
  960. for (auto *DP : DeadPhis)
  961. DP->eraseFromParent();
  962. for (auto *E : ClonedOI->ReturnBlockPreds)
  963. E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
  964. }
  965. bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
  966. auto ComputeRegionCost =
  967. [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
  968. InstructionCost Cost = 0;
  969. for (BasicBlock* BB : Region)
  970. Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
  971. return Cost;
  972. };
  973. assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
  974. if (ClonedOMRI->ORI.empty())
  975. return false;
  976. // The CodeExtractor needs a dominator tree.
  977. DominatorTree DT;
  978. DT.recalculate(*ClonedFunc);
  979. // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
  980. LoopInfo LI(DT);
  981. BranchProbabilityInfo BPI(*ClonedFunc, LI);
  982. ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
  983. // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
  984. CodeExtractorAnalysisCache CEAC(*ClonedFunc);
  985. SetVector<Value *> Inputs, Outputs, Sinks;
  986. for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
  987. ClonedOMRI->ORI) {
  988. InstructionCost CurrentOutlinedRegionCost =
  989. ComputeRegionCost(RegionInfo.Region);
  990. CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
  991. ClonedFuncBFI.get(), &BPI,
  992. LookupAC(*RegionInfo.EntryBlock->getParent()),
  993. /* AllowVarargs */ false);
  994. CE.findInputsOutputs(Inputs, Outputs, Sinks);
  995. LLVM_DEBUG({
  996. dbgs() << "inputs: " << Inputs.size() << "\n";
  997. dbgs() << "outputs: " << Outputs.size() << "\n";
  998. for (Value *value : Inputs)
  999. dbgs() << "value used in func: " << *value << "\n";
  1000. for (Value *output : Outputs)
  1001. dbgs() << "instr used in func: " << *output << "\n";
  1002. });
  1003. // Do not extract regions that have live exit variables.
  1004. if (Outputs.size() > 0 && !ForceLiveExit)
  1005. continue;
  1006. if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
  1007. CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
  1008. BasicBlock *OutliningCallBB = OCS->getParent();
  1009. assert(OutliningCallBB->getParent() == ClonedFunc);
  1010. OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
  1011. NumColdRegionsOutlined++;
  1012. OutlinedRegionCost += CurrentOutlinedRegionCost;
  1013. if (MarkOutlinedColdCC) {
  1014. OutlinedFunc->setCallingConv(CallingConv::Cold);
  1015. OCS->setCallingConv(CallingConv::Cold);
  1016. }
  1017. } else
  1018. ORE.emit([&]() {
  1019. return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
  1020. &RegionInfo.Region.front()->front())
  1021. << "Failed to extract region at block "
  1022. << ore::NV("Block", RegionInfo.Region.front());
  1023. });
  1024. }
  1025. return !OutlinedFunctions.empty();
  1026. }
  1027. Function *
  1028. PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
  1029. // Returns true if the block is to be partial inlined into the caller
  1030. // (i.e. not to be extracted to the out of line function)
  1031. auto ToBeInlined = [&, this](BasicBlock *BB) {
  1032. return BB == ClonedOI->ReturnBlock ||
  1033. llvm::is_contained(ClonedOI->Entries, BB);
  1034. };
  1035. assert(ClonedOI && "Expecting OutlineInfo for single region outline");
  1036. // The CodeExtractor needs a dominator tree.
  1037. DominatorTree DT;
  1038. DT.recalculate(*ClonedFunc);
  1039. // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
  1040. LoopInfo LI(DT);
  1041. BranchProbabilityInfo BPI(*ClonedFunc, LI);
  1042. ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
  1043. // Gather up the blocks that we're going to extract.
  1044. std::vector<BasicBlock *> ToExtract;
  1045. auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
  1046. ToExtract.push_back(ClonedOI->NonReturnBlock);
  1047. OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
  1048. ClonedOI->NonReturnBlock, ClonedFuncTTI);
  1049. for (BasicBlock &BB : *ClonedFunc)
  1050. if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
  1051. ToExtract.push_back(&BB);
  1052. // FIXME: the code extractor may hoist/sink more code
  1053. // into the outlined function which may make the outlining
  1054. // overhead (the difference of the outlined function cost
  1055. // and OutliningRegionCost) look larger.
  1056. OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
  1057. }
  1058. // Extract the body of the if.
  1059. CodeExtractorAnalysisCache CEAC(*ClonedFunc);
  1060. Function *OutlinedFunc =
  1061. CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
  1062. ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
  1063. /* AllowVarargs */ true)
  1064. .extractCodeRegion(CEAC);
  1065. if (OutlinedFunc) {
  1066. BasicBlock *OutliningCallBB =
  1067. PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
  1068. assert(OutliningCallBB->getParent() == ClonedFunc);
  1069. OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
  1070. } else
  1071. ORE.emit([&]() {
  1072. return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
  1073. &ToExtract.front()->front())
  1074. << "Failed to extract region at block "
  1075. << ore::NV("Block", ToExtract.front());
  1076. });
  1077. return OutlinedFunc;
  1078. }
  1079. PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
  1080. // Ditch the duplicate, since we're done with it, and rewrite all remaining
  1081. // users (function pointers, etc.) back to the original function.
  1082. ClonedFunc->replaceAllUsesWith(OrigFunc);
  1083. ClonedFunc->eraseFromParent();
  1084. if (!IsFunctionInlined) {
  1085. // Remove each function that was speculatively created if there is no
  1086. // reference.
  1087. for (auto FuncBBPair : OutlinedFunctions) {
  1088. Function *Func = FuncBBPair.first;
  1089. Func->eraseFromParent();
  1090. }
  1091. }
  1092. }
  1093. std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
  1094. if (F.hasAddressTaken())
  1095. return {false, nullptr};
  1096. // Let inliner handle it
  1097. if (F.hasFnAttribute(Attribute::AlwaysInline))
  1098. return {false, nullptr};
  1099. if (F.hasFnAttribute(Attribute::NoInline))
  1100. return {false, nullptr};
  1101. if (PSI.isFunctionEntryCold(&F))
  1102. return {false, nullptr};
  1103. if (F.users().empty())
  1104. return {false, nullptr};
  1105. OptimizationRemarkEmitter ORE(&F);
  1106. // Only try to outline cold regions if we have a profile summary, which
  1107. // implies we have profiling information.
  1108. if (PSI.hasProfileSummary() && F.hasProfileData() &&
  1109. !DisableMultiRegionPartialInline) {
  1110. std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
  1111. computeOutliningColdRegionsInfo(F, ORE);
  1112. if (OMRI) {
  1113. FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
  1114. LLVM_DEBUG({
  1115. dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
  1116. dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
  1117. << "\n";
  1118. });
  1119. bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
  1120. if (DidOutline) {
  1121. LLVM_DEBUG({
  1122. dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
  1123. Cloner.ClonedFunc->print(dbgs());
  1124. dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
  1125. });
  1126. if (tryPartialInline(Cloner))
  1127. return {true, nullptr};
  1128. }
  1129. }
  1130. }
  1131. // Fall-thru to regular partial inlining if we:
  1132. // i) can't find any cold regions to outline, or
  1133. // ii) can't inline the outlined function anywhere.
  1134. std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
  1135. if (!OI)
  1136. return {false, nullptr};
  1137. FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
  1138. Cloner.normalizeReturnBlock();
  1139. Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
  1140. if (!OutlinedFunction)
  1141. return {false, nullptr};
  1142. if (tryPartialInline(Cloner))
  1143. return {true, OutlinedFunction};
  1144. return {false, nullptr};
  1145. }
  1146. bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
  1147. if (Cloner.OutlinedFunctions.empty())
  1148. return false;
  1149. auto OutliningCosts = computeOutliningCosts(Cloner);
  1150. InstructionCost SizeCost = std::get<0>(OutliningCosts);
  1151. InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
  1152. assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
  1153. "Expected valid costs");
  1154. // Only calculate RelativeToEntryFreq when we are doing single region
  1155. // outlining.
  1156. BranchProbability RelativeToEntryFreq;
  1157. if (Cloner.ClonedOI)
  1158. RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
  1159. else
  1160. // RelativeToEntryFreq doesn't make sense when we have more than one
  1161. // outlined call because each call will have a different relative frequency
  1162. // to the entry block. We can consider using the average, but the
  1163. // usefulness of that information is questionable. For now, assume we never
  1164. // execute the calls to outlined functions.
  1165. RelativeToEntryFreq = BranchProbability(0, 1);
  1166. BlockFrequency WeightedRcost =
  1167. BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
  1168. // The call sequence(s) to the outlined function(s) are larger than the sum of
  1169. // the original outlined region size(s), it does not increase the chances of
  1170. // inlining the function with outlining (The inliner uses the size increase to
  1171. // model the cost of inlining a callee).
  1172. if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
  1173. OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
  1174. DebugLoc DLoc;
  1175. BasicBlock *Block;
  1176. std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
  1177. OrigFuncORE.emit([&]() {
  1178. return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
  1179. DLoc, Block)
  1180. << ore::NV("Function", Cloner.OrigFunc)
  1181. << " not partially inlined into callers (Original Size = "
  1182. << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
  1183. << ", Size of call sequence to outlined function = "
  1184. << ore::NV("NewSize", SizeCost) << ")";
  1185. });
  1186. return false;
  1187. }
  1188. assert(Cloner.OrigFunc->users().empty() &&
  1189. "F's users should all be replaced!");
  1190. std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
  1191. Cloner.ClonedFunc->user_end());
  1192. DenseMap<User *, uint64_t> CallSiteToProfCountMap;
  1193. auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
  1194. if (CalleeEntryCount)
  1195. computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
  1196. uint64_t CalleeEntryCountV =
  1197. (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
  1198. bool AnyInline = false;
  1199. for (User *User : Users) {
  1200. // Don't bother with BlockAddress used by CallBr for asm goto.
  1201. if (isa<BlockAddress>(User))
  1202. continue;
  1203. CallBase *CB = getSupportedCallBase(User);
  1204. if (isLimitReached())
  1205. continue;
  1206. OptimizationRemarkEmitter CallerORE(CB->getCaller());
  1207. if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
  1208. continue;
  1209. // Construct remark before doing the inlining, as after successful inlining
  1210. // the callsite is removed.
  1211. OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
  1212. OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
  1213. << ore::NV("Caller", CB->getCaller());
  1214. InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
  1215. // We can only forward varargs when we outlined a single region, else we
  1216. // bail on vararg functions.
  1217. if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
  1218. (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
  1219. : nullptr))
  1220. .isSuccess())
  1221. continue;
  1222. CallerORE.emit(OR);
  1223. // Now update the entry count:
  1224. if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
  1225. uint64_t CallSiteCount = CallSiteToProfCountMap[User];
  1226. CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
  1227. }
  1228. AnyInline = true;
  1229. NumPartialInlining++;
  1230. // Update the stats
  1231. if (Cloner.ClonedOI)
  1232. NumPartialInlined++;
  1233. else
  1234. NumColdOutlinePartialInlined++;
  1235. }
  1236. if (AnyInline) {
  1237. Cloner.IsFunctionInlined = true;
  1238. if (CalleeEntryCount)
  1239. Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
  1240. CalleeEntryCountV, CalleeEntryCount->getType()));
  1241. OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
  1242. OrigFuncORE.emit([&]() {
  1243. return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
  1244. << "Partially inlined into at least one caller";
  1245. });
  1246. }
  1247. return AnyInline;
  1248. }
  1249. bool PartialInlinerImpl::run(Module &M) {
  1250. if (DisablePartialInlining)
  1251. return false;
  1252. std::vector<Function *> Worklist;
  1253. Worklist.reserve(M.size());
  1254. for (Function &F : M)
  1255. if (!F.use_empty() && !F.isDeclaration())
  1256. Worklist.push_back(&F);
  1257. bool Changed = false;
  1258. while (!Worklist.empty()) {
  1259. Function *CurrFunc = Worklist.back();
  1260. Worklist.pop_back();
  1261. if (CurrFunc->use_empty())
  1262. continue;
  1263. std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
  1264. if (Result.second)
  1265. Worklist.push_back(Result.second);
  1266. Changed |= Result.first;
  1267. }
  1268. return Changed;
  1269. }
  1270. char PartialInlinerLegacyPass::ID = 0;
  1271. INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
  1272. "Partial Inliner", false, false)
  1273. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  1274. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  1275. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  1276. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  1277. INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
  1278. "Partial Inliner", false, false)
  1279. ModulePass *llvm::createPartialInliningPass() {
  1280. return new PartialInlinerLegacyPass();
  1281. }
  1282. PreservedAnalyses PartialInlinerPass::run(Module &M,
  1283. ModuleAnalysisManager &AM) {
  1284. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  1285. auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
  1286. return FAM.getResult<AssumptionAnalysis>(F);
  1287. };
  1288. auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
  1289. return FAM.getCachedResult<AssumptionAnalysis>(F);
  1290. };
  1291. auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
  1292. return FAM.getResult<BlockFrequencyAnalysis>(F);
  1293. };
  1294. auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
  1295. return FAM.getResult<TargetIRAnalysis>(F);
  1296. };
  1297. auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
  1298. return FAM.getResult<TargetLibraryAnalysis>(F);
  1299. };
  1300. ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
  1301. if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
  1302. GetTLI, PSI, GetBFI)
  1303. .run(M))
  1304. return PreservedAnalyses::none();
  1305. return PreservedAnalyses::all();
  1306. }