Inliner.cpp 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198
  1. //===- Inliner.cpp - Code common to all inliners --------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the mechanics required to implement inlining without
  10. // missing any calls and updating the call graph. The decisions of which calls
  11. // are profitable to inline are implemented elsewhere.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/IPO/Inliner.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/ADT/None.h"
  17. #include "llvm/ADT/Optional.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/ADT/ScopeExit.h"
  20. #include "llvm/ADT/SetVector.h"
  21. #include "llvm/ADT/SmallPtrSet.h"
  22. #include "llvm/ADT/SmallVector.h"
  23. #include "llvm/ADT/Statistic.h"
  24. #include "llvm/ADT/StringExtras.h"
  25. #include "llvm/ADT/StringRef.h"
  26. #include "llvm/Analysis/AssumptionCache.h"
  27. #include "llvm/Analysis/BasicAliasAnalysis.h"
  28. #include "llvm/Analysis/BlockFrequencyInfo.h"
  29. #include "llvm/Analysis/CGSCCPassManager.h"
  30. #include "llvm/Analysis/CallGraph.h"
  31. #include "llvm/Analysis/GlobalsModRef.h"
  32. #include "llvm/Analysis/InlineAdvisor.h"
  33. #include "llvm/Analysis/InlineCost.h"
  34. #include "llvm/Analysis/InlineOrder.h"
  35. #include "llvm/Analysis/LazyCallGraph.h"
  36. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  37. #include "llvm/Analysis/ProfileSummaryInfo.h"
  38. #include "llvm/Analysis/ReplayInlineAdvisor.h"
  39. #include "llvm/Analysis/TargetLibraryInfo.h"
  40. #include "llvm/Analysis/TargetTransformInfo.h"
  41. #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
  42. #include "llvm/IR/Attributes.h"
  43. #include "llvm/IR/BasicBlock.h"
  44. #include "llvm/IR/DataLayout.h"
  45. #include "llvm/IR/DebugLoc.h"
  46. #include "llvm/IR/DerivedTypes.h"
  47. #include "llvm/IR/DiagnosticInfo.h"
  48. #include "llvm/IR/Function.h"
  49. #include "llvm/IR/InstIterator.h"
  50. #include "llvm/IR/Instruction.h"
  51. #include "llvm/IR/Instructions.h"
  52. #include "llvm/IR/IntrinsicInst.h"
  53. #include "llvm/IR/Metadata.h"
  54. #include "llvm/IR/Module.h"
  55. #include "llvm/IR/PassManager.h"
  56. #include "llvm/IR/User.h"
  57. #include "llvm/IR/Value.h"
  58. #include "llvm/Pass.h"
  59. #include "llvm/Support/Casting.h"
  60. #include "llvm/Support/CommandLine.h"
  61. #include "llvm/Support/Debug.h"
  62. #include "llvm/Support/raw_ostream.h"
  63. #include "llvm/Transforms/Utils/CallPromotionUtils.h"
  64. #include "llvm/Transforms/Utils/Cloning.h"
  65. #include "llvm/Transforms/Utils/Local.h"
  66. #include "llvm/Transforms/Utils/ModuleUtils.h"
  67. #include <algorithm>
  68. #include <cassert>
  69. #include <functional>
  70. #include <sstream>
  71. #include <tuple>
  72. #include <utility>
  73. #include <vector>
  74. using namespace llvm;
  75. #define DEBUG_TYPE "inline"
  76. STATISTIC(NumInlined, "Number of functions inlined");
  77. STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
  78. STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
  79. STATISTIC(NumMergedAllocas, "Number of allocas merged together");
  80. /// Flag to disable manual alloca merging.
  81. ///
  82. /// Merging of allocas was originally done as a stack-size saving technique
  83. /// prior to LLVM's code generator having support for stack coloring based on
  84. /// lifetime markers. It is now in the process of being removed. To experiment
  85. /// with disabling it and relying fully on lifetime marker based stack
  86. /// coloring, you can pass this flag to LLVM.
  87. static cl::opt<bool>
  88. DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
  89. cl::init(false), cl::Hidden);
  90. static cl::opt<int> IntraSCCCostMultiplier(
  91. "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
  92. cl::desc(
  93. "Cost multiplier to multiply onto inlined call sites where the "
  94. "new call was previously an intra-SCC call (not relevant when the "
  95. "original call was already intra-SCC). This can accumulate over "
  96. "multiple inlinings (e.g. if a call site already had a cost "
  97. "multiplier and one of its inlined calls was also subject to "
  98. "this, the inlined call would have the original multiplier "
  99. "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
  100. "inlining through a child SCC which can cause terrible compile times"));
  101. /// A flag for test, so we can print the content of the advisor when running it
  102. /// as part of the default (e.g. -O3) pipeline.
  103. static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
  104. cl::init(false), cl::Hidden);
  105. extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
  106. static cl::opt<std::string> CGSCCInlineReplayFile(
  107. "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
  108. cl::desc(
  109. "Optimization remarks file containing inline remarks to be replayed "
  110. "by cgscc inlining."),
  111. cl::Hidden);
  112. static cl::opt<ReplayInlinerSettings::Scope> CGSCCInlineReplayScope(
  113. "cgscc-inline-replay-scope",
  114. cl::init(ReplayInlinerSettings::Scope::Function),
  115. cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
  116. "Replay on functions that have remarks associated "
  117. "with them (default)"),
  118. clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
  119. "Replay on the entire module")),
  120. cl::desc("Whether inline replay should be applied to the entire "
  121. "Module or just the Functions (default) that are present as "
  122. "callers in remarks during cgscc inlining."),
  123. cl::Hidden);
  124. static cl::opt<ReplayInlinerSettings::Fallback> CGSCCInlineReplayFallback(
  125. "cgscc-inline-replay-fallback",
  126. cl::init(ReplayInlinerSettings::Fallback::Original),
  127. cl::values(
  128. clEnumValN(
  129. ReplayInlinerSettings::Fallback::Original, "Original",
  130. "All decisions not in replay send to original advisor (default)"),
  131. clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
  132. "AlwaysInline", "All decisions not in replay are inlined"),
  133. clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
  134. "All decisions not in replay are not inlined")),
  135. cl::desc(
  136. "How cgscc inline replay treats sites that don't come from the replay. "
  137. "Original: defers to original advisor, AlwaysInline: inline all sites "
  138. "not in replay, NeverInline: inline no sites not in replay"),
  139. cl::Hidden);
  140. static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
  141. "cgscc-inline-replay-format",
  142. cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
  143. cl::values(
  144. clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
  145. clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
  146. "<Line Number>:<Column Number>"),
  147. clEnumValN(CallSiteFormat::Format::LineDiscriminator,
  148. "LineDiscriminator", "<Line Number>.<Discriminator>"),
  149. clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
  150. "LineColumnDiscriminator",
  151. "<Line Number>:<Column Number>.<Discriminator> (default)")),
  152. cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
  153. static cl::opt<bool> InlineEnablePriorityOrder(
  154. "inline-enable-priority-order", cl::Hidden, cl::init(false),
  155. cl::desc("Enable the priority inline order for the inliner"));
  156. LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
  157. LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
  158. : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
  159. /// For this class, we declare that we require and preserve the call graph.
  160. /// If the derived class implements this method, it should
  161. /// always explicitly call the implementation here.
  162. void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
  163. AU.addRequired<AssumptionCacheTracker>();
  164. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  165. AU.addRequired<TargetLibraryInfoWrapperPass>();
  166. getAAResultsAnalysisUsage(AU);
  167. CallGraphSCCPass::getAnalysisUsage(AU);
  168. }
  169. using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
  170. /// Look at all of the allocas that we inlined through this call site. If we
  171. /// have already inlined other allocas through other calls into this function,
  172. /// then we know that they have disjoint lifetimes and that we can merge them.
  173. ///
  174. /// There are many heuristics possible for merging these allocas, and the
  175. /// different options have different tradeoffs. One thing that we *really*
  176. /// don't want to hurt is SRoA: once inlining happens, often allocas are no
  177. /// longer address taken and so they can be promoted.
  178. ///
  179. /// Our "solution" for that is to only merge allocas whose outermost type is an
  180. /// array type. These are usually not promoted because someone is using a
  181. /// variable index into them. These are also often the most important ones to
  182. /// merge.
  183. ///
  184. /// A better solution would be to have real memory lifetime markers in the IR
  185. /// and not have the inliner do any merging of allocas at all. This would
  186. /// allow the backend to do proper stack slot coloring of all allocas that
  187. /// *actually make it to the backend*, which is really what we want.
  188. ///
  189. /// Because we don't have this information, we do this simple and useful hack.
  190. static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI,
  191. InlinedArrayAllocasTy &InlinedArrayAllocas,
  192. int InlineHistory) {
  193. SmallPtrSet<AllocaInst *, 16> UsedAllocas;
  194. // When processing our SCC, check to see if the call site was inlined from
  195. // some other call site. For example, if we're processing "A" in this code:
  196. // A() { B() }
  197. // B() { x = alloca ... C() }
  198. // C() { y = alloca ... }
  199. // Assume that C was not inlined into B initially, and so we're processing A
  200. // and decide to inline B into A. Doing this makes an alloca available for
  201. // reuse and makes a callsite (C) available for inlining. When we process
  202. // the C call site we don't want to do any alloca merging between X and Y
  203. // because their scopes are not disjoint. We could make this smarter by
  204. // keeping track of the inline history for each alloca in the
  205. // InlinedArrayAllocas but this isn't likely to be a significant win.
  206. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
  207. return;
  208. // Loop over all the allocas we have so far and see if they can be merged with
  209. // a previously inlined alloca. If not, remember that we had it.
  210. for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E;
  211. ++AllocaNo) {
  212. AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
  213. // Don't bother trying to merge array allocations (they will usually be
  214. // canonicalized to be an allocation *of* an array), or allocations whose
  215. // type is not itself an array (because we're afraid of pessimizing SRoA).
  216. ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
  217. if (!ATy || AI->isArrayAllocation())
  218. continue;
  219. // Get the list of all available allocas for this array type.
  220. std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
  221. // Loop over the allocas in AllocasForType to see if we can reuse one. Note
  222. // that we have to be careful not to reuse the same "available" alloca for
  223. // multiple different allocas that we just inlined, we use the 'UsedAllocas'
  224. // set to keep track of which "available" allocas are being used by this
  225. // function. Also, AllocasForType can be empty of course!
  226. bool MergedAwayAlloca = false;
  227. for (AllocaInst *AvailableAlloca : AllocasForType) {
  228. Align Align1 = AI->getAlign();
  229. Align Align2 = AvailableAlloca->getAlign();
  230. // The available alloca has to be in the right function, not in some other
  231. // function in this SCC.
  232. if (AvailableAlloca->getParent() != AI->getParent())
  233. continue;
  234. // If the inlined function already uses this alloca then we can't reuse
  235. // it.
  236. if (!UsedAllocas.insert(AvailableAlloca).second)
  237. continue;
  238. // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
  239. // success!
  240. LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
  241. << "\n\t\tINTO: " << *AvailableAlloca << '\n');
  242. // Move affected dbg.declare calls immediately after the new alloca to
  243. // avoid the situation when a dbg.declare precedes its alloca.
  244. if (auto *L = LocalAsMetadata::getIfExists(AI))
  245. if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
  246. for (User *U : MDV->users())
  247. if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
  248. DDI->moveBefore(AvailableAlloca->getNextNode());
  249. AI->replaceAllUsesWith(AvailableAlloca);
  250. if (Align1 > Align2)
  251. AvailableAlloca->setAlignment(AI->getAlign());
  252. AI->eraseFromParent();
  253. MergedAwayAlloca = true;
  254. ++NumMergedAllocas;
  255. IFI.StaticAllocas[AllocaNo] = nullptr;
  256. break;
  257. }
  258. // If we already nuked the alloca, we're done with it.
  259. if (MergedAwayAlloca)
  260. continue;
  261. // If we were unable to merge away the alloca either because there are no
  262. // allocas of the right type available or because we reused them all
  263. // already, remember that this alloca came from an inlined function and mark
  264. // it used so we don't reuse it for other allocas from this inline
  265. // operation.
  266. AllocasForType.push_back(AI);
  267. UsedAllocas.insert(AI);
  268. }
  269. }
  270. /// If it is possible to inline the specified call site,
  271. /// do so and update the CallGraph for this operation.
  272. ///
  273. /// This function also does some basic book-keeping to update the IR. The
  274. /// InlinedArrayAllocas map keeps track of any allocas that are already
  275. /// available from other functions inlined into the caller. If we are able to
  276. /// inline this call site we attempt to reuse already available allocas or add
  277. /// any new allocas to the set if not possible.
  278. static InlineResult inlineCallIfPossible(
  279. CallBase &CB, InlineFunctionInfo &IFI,
  280. InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
  281. bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
  282. ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
  283. Function *Callee = CB.getCalledFunction();
  284. Function *Caller = CB.getCaller();
  285. AAResults &AAR = AARGetter(*Callee);
  286. // Try to inline the function. Get the list of static allocas that were
  287. // inlined.
  288. InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime);
  289. if (!IR.isSuccess())
  290. return IR;
  291. if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
  292. ImportedFunctionsStats.recordInline(*Caller, *Callee);
  293. AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
  294. if (!DisableInlinedAllocaMerging)
  295. mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
  296. return IR; // success
  297. }
  298. /// Return true if the specified inline history ID
  299. /// indicates an inline history that includes the specified function.
  300. static bool inlineHistoryIncludes(
  301. Function *F, int InlineHistoryID,
  302. const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
  303. while (InlineHistoryID != -1) {
  304. assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
  305. "Invalid inline history ID");
  306. if (InlineHistory[InlineHistoryID].first == F)
  307. return true;
  308. InlineHistoryID = InlineHistory[InlineHistoryID].second;
  309. }
  310. return false;
  311. }
  312. bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
  313. if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
  314. ImportedFunctionsStats.setModuleInfo(CG.getModule());
  315. return false; // No changes to CallGraph.
  316. }
  317. bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
  318. if (skipSCC(SCC))
  319. return false;
  320. return inlineCalls(SCC);
  321. }
  322. static bool
  323. inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
  324. std::function<AssumptionCache &(Function &)> GetAssumptionCache,
  325. ProfileSummaryInfo *PSI,
  326. std::function<const TargetLibraryInfo &(Function &)> GetTLI,
  327. bool InsertLifetime,
  328. function_ref<InlineCost(CallBase &CB)> GetInlineCost,
  329. function_ref<AAResults &(Function &)> AARGetter,
  330. ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
  331. SmallPtrSet<Function *, 8> SCCFunctions;
  332. LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
  333. for (CallGraphNode *Node : SCC) {
  334. Function *F = Node->getFunction();
  335. if (F)
  336. SCCFunctions.insert(F);
  337. LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
  338. }
  339. // Scan through and identify all call sites ahead of time so that we only
  340. // inline call sites in the original functions, not call sites that result
  341. // from inlining other functions.
  342. SmallVector<std::pair<CallBase *, int>, 16> CallSites;
  343. // When inlining a callee produces new call sites, we want to keep track of
  344. // the fact that they were inlined from the callee. This allows us to avoid
  345. // infinite inlining in some obscure cases. To represent this, we use an
  346. // index into the InlineHistory vector.
  347. SmallVector<std::pair<Function *, int>, 8> InlineHistory;
  348. for (CallGraphNode *Node : SCC) {
  349. Function *F = Node->getFunction();
  350. if (!F || F->isDeclaration())
  351. continue;
  352. OptimizationRemarkEmitter ORE(F);
  353. for (BasicBlock &BB : *F)
  354. for (Instruction &I : BB) {
  355. auto *CB = dyn_cast<CallBase>(&I);
  356. // If this isn't a call, or it is a call to an intrinsic, it can
  357. // never be inlined.
  358. if (!CB || isa<IntrinsicInst>(I))
  359. continue;
  360. // If this is a direct call to an external function, we can never inline
  361. // it. If it is an indirect call, inlining may resolve it to be a
  362. // direct call, so we keep it.
  363. if (Function *Callee = CB->getCalledFunction())
  364. if (Callee->isDeclaration()) {
  365. using namespace ore;
  366. setInlineRemark(*CB, "unavailable definition");
  367. ORE.emit([&]() {
  368. return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
  369. << NV("Callee", Callee) << " will not be inlined into "
  370. << NV("Caller", CB->getCaller())
  371. << " because its definition is unavailable"
  372. << setIsVerbose();
  373. });
  374. continue;
  375. }
  376. CallSites.push_back(std::make_pair(CB, -1));
  377. }
  378. }
  379. LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
  380. // If there are no calls in this function, exit early.
  381. if (CallSites.empty())
  382. return false;
  383. // Now that we have all of the call sites, move the ones to functions in the
  384. // current SCC to the end of the list.
  385. unsigned FirstCallInSCC = CallSites.size();
  386. for (unsigned I = 0; I < FirstCallInSCC; ++I)
  387. if (Function *F = CallSites[I].first->getCalledFunction())
  388. if (SCCFunctions.count(F))
  389. std::swap(CallSites[I--], CallSites[--FirstCallInSCC]);
  390. InlinedArrayAllocasTy InlinedArrayAllocas;
  391. InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI);
  392. // Now that we have all of the call sites, loop over them and inline them if
  393. // it looks profitable to do so.
  394. bool Changed = false;
  395. bool LocalChange;
  396. do {
  397. LocalChange = false;
  398. // Iterate over the outer loop because inlining functions can cause indirect
  399. // calls to become direct calls.
  400. // CallSites may be modified inside so ranged for loop can not be used.
  401. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
  402. auto &P = CallSites[CSi];
  403. CallBase &CB = *P.first;
  404. const int InlineHistoryID = P.second;
  405. Function *Caller = CB.getCaller();
  406. Function *Callee = CB.getCalledFunction();
  407. // We can only inline direct calls to non-declarations.
  408. if (!Callee || Callee->isDeclaration())
  409. continue;
  410. bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller));
  411. if (!IsTriviallyDead) {
  412. // If this call site was obtained by inlining another function, verify
  413. // that the include path for the function did not include the callee
  414. // itself. If so, we'd be recursively inlining the same function,
  415. // which would provide the same callsites, which would cause us to
  416. // infinitely inline.
  417. if (InlineHistoryID != -1 &&
  418. inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
  419. setInlineRemark(CB, "recursive");
  420. continue;
  421. }
  422. }
  423. // FIXME for new PM: because of the old PM we currently generate ORE and
  424. // in turn BFI on demand. With the new PM, the ORE dependency should
  425. // just become a regular analysis dependency.
  426. OptimizationRemarkEmitter ORE(Caller);
  427. auto OIC = shouldInline(CB, GetInlineCost, ORE);
  428. // If the policy determines that we should inline this function,
  429. // delete the call instead.
  430. if (!OIC)
  431. continue;
  432. // If this call site is dead and it is to a readonly function, we should
  433. // just delete the call instead of trying to inline it, regardless of
  434. // size. This happens because IPSCCP propagates the result out of the
  435. // call and then we're left with the dead call.
  436. if (IsTriviallyDead) {
  437. LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n");
  438. // Update the call graph by deleting the edge from Callee to Caller.
  439. setInlineRemark(CB, "trivially dead");
  440. CG[Caller]->removeCallEdgeFor(CB);
  441. CB.eraseFromParent();
  442. ++NumCallsDeleted;
  443. } else {
  444. // Get DebugLoc to report. CB will be invalid after Inliner.
  445. DebugLoc DLoc = CB.getDebugLoc();
  446. BasicBlock *Block = CB.getParent();
  447. // Attempt to inline the function.
  448. using namespace ore;
  449. InlineResult IR = inlineCallIfPossible(
  450. CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
  451. InsertLifetime, AARGetter, ImportedFunctionsStats);
  452. if (!IR.isSuccess()) {
  453. setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " +
  454. inlineCostStr(*OIC));
  455. ORE.emit([&]() {
  456. return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
  457. Block)
  458. << NV("Callee", Callee) << " will not be inlined into "
  459. << NV("Caller", Caller) << ": "
  460. << NV("Reason", IR.getFailureReason());
  461. });
  462. continue;
  463. }
  464. ++NumInlined;
  465. emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
  466. // If inlining this function gave us any new call sites, throw them
  467. // onto our worklist to process. They are useful inline candidates.
  468. if (!InlineInfo.InlinedCalls.empty()) {
  469. // Create a new inline history entry for this, so that we remember
  470. // that these new callsites came about due to inlining Callee.
  471. int NewHistoryID = InlineHistory.size();
  472. InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
  473. #ifndef NDEBUG
  474. // Make sure no dupplicates in the inline candidates. This could
  475. // happen when a callsite is simpilfied to reusing the return value
  476. // of another callsite during function cloning, thus the other
  477. // callsite will be reconsidered here.
  478. DenseSet<CallBase *> DbgCallSites;
  479. for (auto &II : CallSites)
  480. DbgCallSites.insert(II.first);
  481. #endif
  482. for (Value *Ptr : InlineInfo.InlinedCalls) {
  483. #ifndef NDEBUG
  484. assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0);
  485. #endif
  486. CallSites.push_back(
  487. std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID));
  488. }
  489. }
  490. }
  491. // If we inlined or deleted the last possible call site to the function,
  492. // delete the function body now.
  493. if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
  494. // TODO: Can remove if in SCC now.
  495. !SCCFunctions.count(Callee) &&
  496. // The function may be apparently dead, but if there are indirect
  497. // callgraph references to the node, we cannot delete it yet, this
  498. // could invalidate the CGSCC iterator.
  499. CG[Callee]->getNumReferences() == 0) {
  500. LLVM_DEBUG(dbgs() << " -> Deleting dead function: "
  501. << Callee->getName() << "\n");
  502. CallGraphNode *CalleeNode = CG[Callee];
  503. // Remove any call graph edges from the callee to its callees.
  504. CalleeNode->removeAllCalledFunctions();
  505. // Removing the node for callee from the call graph and delete it.
  506. delete CG.removeFunctionFromModule(CalleeNode);
  507. ++NumDeleted;
  508. }
  509. // Remove this call site from the list. If possible, use
  510. // swap/pop_back for efficiency, but do not use it if doing so would
  511. // move a call site to a function in this SCC before the
  512. // 'FirstCallInSCC' barrier.
  513. if (SCC.isSingular()) {
  514. CallSites[CSi] = CallSites.back();
  515. CallSites.pop_back();
  516. } else {
  517. CallSites.erase(CallSites.begin() + CSi);
  518. }
  519. --CSi;
  520. Changed = true;
  521. LocalChange = true;
  522. }
  523. } while (LocalChange);
  524. return Changed;
  525. }
  526. bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
  527. CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
  528. ACT = &getAnalysis<AssumptionCacheTracker>();
  529. PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  530. GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
  531. return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  532. };
  533. auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
  534. return ACT->getAssumptionCache(F);
  535. };
  536. return inlineCallsImpl(
  537. SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
  538. [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this),
  539. ImportedFunctionsStats);
  540. }
  541. /// Remove now-dead linkonce functions at the end of
  542. /// processing to avoid breaking the SCC traversal.
  543. bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
  544. if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
  545. ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
  546. InlinerFunctionImportStatsOpts::Verbose);
  547. return removeDeadFunctions(CG);
  548. }
  549. /// Remove dead functions that are not included in DNR (Do Not Remove) list.
  550. bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
  551. bool AlwaysInlineOnly) {
  552. SmallVector<CallGraphNode *, 16> FunctionsToRemove;
  553. SmallVector<Function *, 16> DeadFunctionsInComdats;
  554. auto RemoveCGN = [&](CallGraphNode *CGN) {
  555. // Remove any call graph edges from the function to its callees.
  556. CGN->removeAllCalledFunctions();
  557. // Remove any edges from the external node to the function's call graph
  558. // node. These edges might have been made irrelegant due to
  559. // optimization of the program.
  560. CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
  561. // Removing the node for callee from the call graph and delete it.
  562. FunctionsToRemove.push_back(CGN);
  563. };
  564. // Scan for all of the functions, looking for ones that should now be removed
  565. // from the program. Insert the dead ones in the FunctionsToRemove set.
  566. for (const auto &I : CG) {
  567. CallGraphNode *CGN = I.second.get();
  568. Function *F = CGN->getFunction();
  569. if (!F || F->isDeclaration())
  570. continue;
  571. // Handle the case when this function is called and we only want to care
  572. // about always-inline functions. This is a bit of a hack to share code
  573. // between here and the InlineAlways pass.
  574. if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
  575. continue;
  576. // If the only remaining users of the function are dead constants, remove
  577. // them.
  578. F->removeDeadConstantUsers();
  579. if (!F->isDefTriviallyDead())
  580. continue;
  581. // It is unsafe to drop a function with discardable linkage from a COMDAT
  582. // without also dropping the other members of the COMDAT.
  583. // The inliner doesn't visit non-function entities which are in COMDAT
  584. // groups so it is unsafe to do so *unless* the linkage is local.
  585. if (!F->hasLocalLinkage()) {
  586. if (F->hasComdat()) {
  587. DeadFunctionsInComdats.push_back(F);
  588. continue;
  589. }
  590. }
  591. RemoveCGN(CGN);
  592. }
  593. if (!DeadFunctionsInComdats.empty()) {
  594. // Filter out the functions whose comdats remain alive.
  595. filterDeadComdatFunctions(DeadFunctionsInComdats);
  596. // Remove the rest.
  597. for (Function *F : DeadFunctionsInComdats)
  598. RemoveCGN(CG[F]);
  599. }
  600. if (FunctionsToRemove.empty())
  601. return false;
  602. // Now that we know which functions to delete, do so. We didn't want to do
  603. // this inline, because that would invalidate our CallGraph::iterator
  604. // objects. :(
  605. //
  606. // Note that it doesn't matter that we are iterating over a non-stable order
  607. // here to do this, it doesn't matter which order the functions are deleted
  608. // in.
  609. array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
  610. FunctionsToRemove.erase(
  611. std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
  612. FunctionsToRemove.end());
  613. for (CallGraphNode *CGN : FunctionsToRemove) {
  614. delete CG.removeFunctionFromModule(CGN);
  615. ++NumDeleted;
  616. }
  617. return true;
  618. }
  619. InlineAdvisor &
  620. InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
  621. FunctionAnalysisManager &FAM, Module &M) {
  622. if (OwnedAdvisor)
  623. return *OwnedAdvisor;
  624. auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
  625. if (!IAA) {
  626. // It should still be possible to run the inliner as a stand-alone SCC pass,
  627. // for test scenarios. In that case, we default to the
  628. // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass
  629. // runs. It also uses just the default InlineParams.
  630. // In this case, we need to use the provided FAM, which is valid for the
  631. // duration of the inliner pass, and thus the lifetime of the owned advisor.
  632. // The one we would get from the MAM can be invalidated as a result of the
  633. // inliner's activity.
  634. OwnedAdvisor =
  635. std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
  636. if (!CGSCCInlineReplayFile.empty())
  637. OwnedAdvisor = getReplayInlineAdvisor(
  638. M, FAM, M.getContext(), std::move(OwnedAdvisor),
  639. ReplayInlinerSettings{CGSCCInlineReplayFile,
  640. CGSCCInlineReplayScope,
  641. CGSCCInlineReplayFallback,
  642. {CGSCCInlineReplayFormat}},
  643. /*EmitRemarks=*/true);
  644. return *OwnedAdvisor;
  645. }
  646. assert(IAA->getAdvisor() &&
  647. "Expected a present InlineAdvisorAnalysis also have an "
  648. "InlineAdvisor initialized");
  649. return *IAA->getAdvisor();
  650. }
  651. PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
  652. CGSCCAnalysisManager &AM, LazyCallGraph &CG,
  653. CGSCCUpdateResult &UR) {
  654. const auto &MAMProxy =
  655. AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
  656. bool Changed = false;
  657. assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
  658. Module &M = *InitialC.begin()->getFunction().getParent();
  659. ProfileSummaryInfo *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(M);
  660. FunctionAnalysisManager &FAM =
  661. AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
  662. .getManager();
  663. InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M);
  664. Advisor.onPassEntry();
  665. auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); });
  666. // We use a single common worklist for calls across the entire SCC. We
  667. // process these in-order and append new calls introduced during inlining to
  668. // the end. The PriorityInlineOrder is optional here, in which the smaller
  669. // callee would have a higher priority to inline.
  670. //
  671. // Note that this particular order of processing is actually critical to
  672. // avoid very bad behaviors. Consider *highly connected* call graphs where
  673. // each function contains a small amount of code and a couple of calls to
  674. // other functions. Because the LLVM inliner is fundamentally a bottom-up
  675. // inliner, it can handle gracefully the fact that these all appear to be
  676. // reasonable inlining candidates as it will flatten things until they become
  677. // too big to inline, and then move on and flatten another batch.
  678. //
  679. // However, when processing call edges *within* an SCC we cannot rely on this
  680. // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
  681. // functions we can end up incrementally inlining N calls into each of
  682. // N functions because each incremental inlining decision looks good and we
  683. // don't have a topological ordering to prevent explosions.
  684. //
  685. // To compensate for this, we don't process transitive edges made immediate
  686. // by inlining until we've done one pass of inlining across the entire SCC.
  687. // Large, highly connected SCCs still lead to some amount of code bloat in
  688. // this model, but it is uniformly spread across all the functions in the SCC
  689. // and eventually they all become too large to inline, rather than
  690. // incrementally maknig a single function grow in a super linear fashion.
  691. std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
  692. if (InlineEnablePriorityOrder)
  693. Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
  694. else
  695. Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
  696. assert(Calls != nullptr && "Expected an initialized InlineOrder");
  697. // Populate the initial list of calls in this SCC.
  698. for (auto &N : InitialC) {
  699. auto &ORE =
  700. FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
  701. // We want to generally process call sites top-down in order for
  702. // simplifications stemming from replacing the call with the returned value
  703. // after inlining to be visible to subsequent inlining decisions.
  704. // FIXME: Using instructions sequence is a really bad way to do this.
  705. // Instead we should do an actual RPO walk of the function body.
  706. for (Instruction &I : instructions(N.getFunction()))
  707. if (auto *CB = dyn_cast<CallBase>(&I))
  708. if (Function *Callee = CB->getCalledFunction()) {
  709. if (!Callee->isDeclaration())
  710. Calls->push({CB, -1});
  711. else if (!isa<IntrinsicInst>(I)) {
  712. using namespace ore;
  713. setInlineRemark(*CB, "unavailable definition");
  714. ORE.emit([&]() {
  715. return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
  716. << NV("Callee", Callee) << " will not be inlined into "
  717. << NV("Caller", CB->getCaller())
  718. << " because its definition is unavailable"
  719. << setIsVerbose();
  720. });
  721. }
  722. }
  723. }
  724. if (Calls->empty())
  725. return PreservedAnalyses::all();
  726. // Capture updatable variable for the current SCC.
  727. auto *C = &InitialC;
  728. // When inlining a callee produces new call sites, we want to keep track of
  729. // the fact that they were inlined from the callee. This allows us to avoid
  730. // infinite inlining in some obscure cases. To represent this, we use an
  731. // index into the InlineHistory vector.
  732. SmallVector<std::pair<Function *, int>, 16> InlineHistory;
  733. // Track a set vector of inlined callees so that we can augment the caller
  734. // with all of their edges in the call graph before pruning out the ones that
  735. // got simplified away.
  736. SmallSetVector<Function *, 4> InlinedCallees;
  737. // Track the dead functions to delete once finished with inlining calls. We
  738. // defer deleting these to make it easier to handle the call graph updates.
  739. SmallVector<Function *, 4> DeadFunctions;
  740. // Track potentially dead non-local functions with comdats to see if they can
  741. // be deleted as a batch after inlining.
  742. SmallVector<Function *, 4> DeadFunctionsInComdats;
  743. // Loop forward over all of the calls.
  744. while (!Calls->empty()) {
  745. // We expect the calls to typically be batched with sequences of calls that
  746. // have the same caller, so we first set up some shared infrastructure for
  747. // this caller. We also do any pruning we can at this layer on the caller
  748. // alone.
  749. Function &F = *Calls->front().first->getCaller();
  750. LazyCallGraph::Node &N = *CG.lookup(F);
  751. if (CG.lookupSCC(N) != C) {
  752. Calls->pop();
  753. continue;
  754. }
  755. LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
  756. << " Function size: " << F.getInstructionCount()
  757. << "\n");
  758. auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
  759. return FAM.getResult<AssumptionAnalysis>(F);
  760. };
  761. // Now process as many calls as we have within this caller in the sequence.
  762. // We bail out as soon as the caller has to change so we can update the
  763. // call graph and prepare the context of that new caller.
  764. bool DidInline = false;
  765. while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
  766. auto P = Calls->pop();
  767. CallBase *CB = P.first;
  768. const int InlineHistoryID = P.second;
  769. Function &Callee = *CB->getCalledFunction();
  770. if (InlineHistoryID != -1 &&
  771. inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
  772. LLVM_DEBUG(dbgs() << "Skipping inlining due to history: "
  773. << F.getName() << " -> " << Callee.getName() << "\n");
  774. setInlineRemark(*CB, "recursive");
  775. continue;
  776. }
  777. // Check if this inlining may repeat breaking an SCC apart that has
  778. // already been split once before. In that case, inlining here may
  779. // trigger infinite inlining, much like is prevented within the inliner
  780. // itself by the InlineHistory above, but spread across CGSCC iterations
  781. // and thus hidden from the full inline history.
  782. LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
  783. if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
  784. LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
  785. "previously split out of this SCC by inlining: "
  786. << F.getName() << " -> " << Callee.getName() << "\n");
  787. setInlineRemark(*CB, "recursive SCC split");
  788. continue;
  789. }
  790. std::unique_ptr<InlineAdvice> Advice =
  791. Advisor.getAdvice(*CB, OnlyMandatory);
  792. // Check whether we want to inline this callsite.
  793. if (!Advice)
  794. continue;
  795. if (!Advice->isInliningRecommended()) {
  796. Advice->recordUnattemptedInlining();
  797. continue;
  798. }
  799. int CBCostMult =
  800. getStringFnAttrAsInt(
  801. *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
  802. .getValueOr(1);
  803. // Setup the data structure used to plumb customization into the
  804. // `InlineFunction` routine.
  805. InlineFunctionInfo IFI(
  806. /*cg=*/nullptr, GetAssumptionCache, PSI,
  807. &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
  808. &FAM.getResult<BlockFrequencyAnalysis>(Callee));
  809. InlineResult IR =
  810. InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
  811. if (!IR.isSuccess()) {
  812. Advice->recordUnsuccessfulInlining(IR);
  813. continue;
  814. }
  815. DidInline = true;
  816. InlinedCallees.insert(&Callee);
  817. ++NumInlined;
  818. LLVM_DEBUG(dbgs() << " Size after inlining: "
  819. << F.getInstructionCount() << "\n");
  820. // Add any new callsites to defined functions to the worklist.
  821. if (!IFI.InlinedCallSites.empty()) {
  822. int NewHistoryID = InlineHistory.size();
  823. InlineHistory.push_back({&Callee, InlineHistoryID});
  824. for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
  825. Function *NewCallee = ICB->getCalledFunction();
  826. assert(!(NewCallee && NewCallee->isIntrinsic()) &&
  827. "Intrinsic calls should not be tracked.");
  828. if (!NewCallee) {
  829. // Try to promote an indirect (virtual) call without waiting for
  830. // the post-inline cleanup and the next DevirtSCCRepeatedPass
  831. // iteration because the next iteration may not happen and we may
  832. // miss inlining it.
  833. if (tryPromoteCall(*ICB))
  834. NewCallee = ICB->getCalledFunction();
  835. }
  836. if (NewCallee) {
  837. if (!NewCallee->isDeclaration()) {
  838. Calls->push({ICB, NewHistoryID});
  839. // Continually inlining through an SCC can result in huge compile
  840. // times and bloated code since we arbitrarily stop at some point
  841. // when the inliner decides it's not profitable to inline anymore.
  842. // We attempt to mitigate this by making these calls exponentially
  843. // more expensive.
  844. // This doesn't apply to calls in the same SCC since if we do
  845. // inline through the SCC the function will end up being
  846. // self-recursive which the inliner bails out on, and inlining
  847. // within an SCC is necessary for performance.
  848. if (CalleeSCC != C &&
  849. CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
  850. Attribute NewCBCostMult = Attribute::get(
  851. M.getContext(),
  852. InlineConstants::FunctionInlineCostMultiplierAttributeName,
  853. itostr(CBCostMult * IntraSCCCostMultiplier));
  854. ICB->addFnAttr(NewCBCostMult);
  855. }
  856. }
  857. }
  858. }
  859. }
  860. // Merge the attributes based on the inlining.
  861. AttributeFuncs::mergeAttributesForInlining(F, Callee);
  862. // For local functions or discardable functions without comdats, check
  863. // whether this makes the callee trivially dead. In that case, we can drop
  864. // the body of the function eagerly which may reduce the number of callers
  865. // of other functions to one, changing inline cost thresholds. Non-local
  866. // discardable functions with comdats are checked later on.
  867. bool CalleeWasDeleted = false;
  868. if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() &&
  869. !CG.isLibFunction(Callee)) {
  870. if (Callee.hasLocalLinkage() || !Callee.hasComdat()) {
  871. Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
  872. return Call.first->getCaller() == &Callee;
  873. });
  874. // Clear the body and queue the function itself for deletion when we
  875. // finish inlining and call graph updates.
  876. // Note that after this point, it is an error to do anything other
  877. // than use the callee's address or delete it.
  878. Callee.dropAllReferences();
  879. assert(!is_contained(DeadFunctions, &Callee) &&
  880. "Cannot put cause a function to become dead twice!");
  881. DeadFunctions.push_back(&Callee);
  882. CalleeWasDeleted = true;
  883. } else {
  884. DeadFunctionsInComdats.push_back(&Callee);
  885. }
  886. }
  887. if (CalleeWasDeleted)
  888. Advice->recordInliningWithCalleeDeleted();
  889. else
  890. Advice->recordInlining();
  891. }
  892. if (!DidInline)
  893. continue;
  894. Changed = true;
  895. // At this point, since we have made changes we have at least removed
  896. // a call instruction. However, in the process we do some incremental
  897. // simplification of the surrounding code. This simplification can
  898. // essentially do all of the same things as a function pass and we can
  899. // re-use the exact same logic for updating the call graph to reflect the
  900. // change.
  901. // Inside the update, we also update the FunctionAnalysisManager in the
  902. // proxy for this particular SCC. We do this as the SCC may have changed and
  903. // as we're going to mutate this particular function we want to make sure
  904. // the proxy is in place to forward any invalidation events.
  905. LazyCallGraph::SCC *OldC = C;
  906. C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM);
  907. LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
  908. // If this causes an SCC to split apart into multiple smaller SCCs, there
  909. // is a subtle risk we need to prepare for. Other transformations may
  910. // expose an "infinite inlining" opportunity later, and because of the SCC
  911. // mutation, we will revisit this function and potentially re-inline. If we
  912. // do, and that re-inlining also has the potentially to mutate the SCC
  913. // structure, the infinite inlining problem can manifest through infinite
  914. // SCC splits and merges. To avoid this, we capture the originating caller
  915. // node and the SCC containing the call edge. This is a slight over
  916. // approximation of the possible inlining decisions that must be avoided,
  917. // but is relatively efficient to store. We use C != OldC to know when
  918. // a new SCC is generated and the original SCC may be generated via merge
  919. // in later iterations.
  920. //
  921. // It is also possible that even if no new SCC is generated
  922. // (i.e., C == OldC), the original SCC could be split and then merged
  923. // into the same one as itself. and the original SCC will be added into
  924. // UR.CWorklist again, we want to catch such cases too.
  925. //
  926. // FIXME: This seems like a very heavyweight way of retaining the inline
  927. // history, we should look for a more efficient way of tracking it.
  928. if ((C != OldC || UR.CWorklist.count(OldC)) &&
  929. llvm::any_of(InlinedCallees, [&](Function *Callee) {
  930. return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
  931. })) {
  932. LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
  933. "retaining this to avoid infinite inlining.\n");
  934. UR.InlinedInternalEdges.insert({&N, OldC});
  935. }
  936. InlinedCallees.clear();
  937. // Invalidate analyses for this function now so that we don't have to
  938. // invalidate analyses for all functions in this SCC later.
  939. FAM.invalidate(F, PreservedAnalyses::none());
  940. }
  941. // We must ensure that we only delete functions with comdats if every function
  942. // in the comdat is going to be deleted.
  943. if (!DeadFunctionsInComdats.empty()) {
  944. filterDeadComdatFunctions(DeadFunctionsInComdats);
  945. for (auto *Callee : DeadFunctionsInComdats)
  946. Callee->dropAllReferences();
  947. DeadFunctions.append(DeadFunctionsInComdats);
  948. }
  949. // Now that we've finished inlining all of the calls across this SCC, delete
  950. // all of the trivially dead functions, updating the call graph and the CGSCC
  951. // pass manager in the process.
  952. //
  953. // Note that this walks a pointer set which has non-deterministic order but
  954. // that is OK as all we do is delete things and add pointers to unordered
  955. // sets.
  956. for (Function *DeadF : DeadFunctions) {
  957. // Get the necessary information out of the call graph and nuke the
  958. // function there. Also, clear out any cached analyses.
  959. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
  960. FAM.clear(*DeadF, DeadF->getName());
  961. AM.clear(DeadC, DeadC.getName());
  962. auto &DeadRC = DeadC.getOuterRefSCC();
  963. CG.removeDeadFunction(*DeadF);
  964. // Mark the relevant parts of the call graph as invalid so we don't visit
  965. // them.
  966. UR.InvalidatedSCCs.insert(&DeadC);
  967. UR.InvalidatedRefSCCs.insert(&DeadRC);
  968. // If the updated SCC was the one containing the deleted function, clear it.
  969. if (&DeadC == UR.UpdatedC)
  970. UR.UpdatedC = nullptr;
  971. // And delete the actual function from the module.
  972. M.getFunctionList().erase(DeadF);
  973. ++NumDeleted;
  974. }
  975. if (!Changed)
  976. return PreservedAnalyses::all();
  977. PreservedAnalyses PA;
  978. // Even if we change the IR, we update the core CGSCC data structures and so
  979. // can preserve the proxy to the function analysis manager.
  980. PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
  981. // We have already invalidated all analyses on modified functions.
  982. PA.preserveSet<AllAnalysesOn<Function>>();
  983. return PA;
  984. }
  985. ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
  986. bool MandatoryFirst,
  987. InliningAdvisorMode Mode,
  988. unsigned MaxDevirtIterations)
  989. : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) {
  990. // Run the inliner first. The theory is that we are walking bottom-up and so
  991. // the callees have already been fully optimized, and we want to inline them
  992. // into the callers so that our optimizations can reflect that.
  993. // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
  994. // because it makes profile annotation in the backend inaccurate.
  995. if (MandatoryFirst)
  996. PM.addPass(InlinerPass(/*OnlyMandatory*/ true));
  997. PM.addPass(InlinerPass());
  998. }
  999. PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
  1000. ModuleAnalysisManager &MAM) {
  1001. auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
  1002. if (!IAA.tryCreate(Params, Mode,
  1003. {CGSCCInlineReplayFile,
  1004. CGSCCInlineReplayScope,
  1005. CGSCCInlineReplayFallback,
  1006. {CGSCCInlineReplayFormat}})) {
  1007. M.getContext().emitError(
  1008. "Could not setup Inlining Advisor for the requested "
  1009. "mode and/or options");
  1010. return PreservedAnalyses::all();
  1011. }
  1012. // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
  1013. // to detect when we devirtualize indirect calls and iterate the SCC passes
  1014. // in that case to try and catch knock-on inlining or function attrs
  1015. // opportunities. Then we add it to the module pipeline by walking the SCCs
  1016. // in postorder (or bottom-up).
  1017. // If MaxDevirtIterations is 0, we just don't use the devirtualization
  1018. // wrapper.
  1019. if (MaxDevirtIterations == 0)
  1020. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM)));
  1021. else
  1022. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
  1023. createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
  1024. MPM.addPass(std::move(AfterCGMPM));
  1025. MPM.run(M, MAM);
  1026. // Discard the InlineAdvisor, a subsequent inlining session should construct
  1027. // its own.
  1028. auto PA = PreservedAnalyses::all();
  1029. if (!KeepAdvisorForPrinting)
  1030. PA.abandon<InlineAdvisorAnalysis>();
  1031. return PA;
  1032. }
  1033. void InlinerPass::printPipeline(
  1034. raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
  1035. static_cast<PassInfoMixin<InlinerPass> *>(this)->printPipeline(
  1036. OS, MapClassName2PassName);
  1037. if (OnlyMandatory)
  1038. OS << "<only-mandatory>";
  1039. }
  1040. void ModuleInlinerWrapperPass::printPipeline(
  1041. raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
  1042. // Print some info about passes added to the wrapper. This is however
  1043. // incomplete as InlineAdvisorAnalysis part isn't included (which also depends
  1044. // on Params and Mode).
  1045. if (!MPM.isEmpty()) {
  1046. MPM.printPipeline(OS, MapClassName2PassName);
  1047. OS << ",";
  1048. }
  1049. OS << "cgscc(";
  1050. if (MaxDevirtIterations != 0)
  1051. OS << "devirt<" << MaxDevirtIterations << ">(";
  1052. PM.printPipeline(OS, MapClassName2PassName);
  1053. if (MaxDevirtIterations != 0)
  1054. OS << ")";
  1055. OS << ")";
  1056. }