InlineCost.cpp 120 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181
  1. //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements inline cost analysis.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Analysis/InlineCost.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/SetVector.h"
  15. #include "llvm/ADT/SmallPtrSet.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/BlockFrequencyInfo.h"
  20. #include "llvm/Analysis/CodeMetrics.h"
  21. #include "llvm/Analysis/ConstantFolding.h"
  22. #include "llvm/Analysis/InstructionSimplify.h"
  23. #include "llvm/Analysis/LoopInfo.h"
  24. #include "llvm/Analysis/MemoryBuiltins.h"
  25. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  26. #include "llvm/Analysis/ProfileSummaryInfo.h"
  27. #include "llvm/Analysis/TargetLibraryInfo.h"
  28. #include "llvm/Analysis/TargetTransformInfo.h"
  29. #include "llvm/Analysis/ValueTracking.h"
  30. #include "llvm/Config/llvm-config.h"
  31. #include "llvm/IR/AssemblyAnnotationWriter.h"
  32. #include "llvm/IR/CallingConv.h"
  33. #include "llvm/IR/DataLayout.h"
  34. #include "llvm/IR/Dominators.h"
  35. #include "llvm/IR/GetElementPtrTypeIterator.h"
  36. #include "llvm/IR/GlobalAlias.h"
  37. #include "llvm/IR/InstVisitor.h"
  38. #include "llvm/IR/IntrinsicInst.h"
  39. #include "llvm/IR/Operator.h"
  40. #include "llvm/IR/PatternMatch.h"
  41. #include "llvm/Support/CommandLine.h"
  42. #include "llvm/Support/Debug.h"
  43. #include "llvm/Support/FormattedStream.h"
  44. #include "llvm/Support/raw_ostream.h"
  45. #include <climits>
  46. #include <limits>
  47. #include <optional>
  48. using namespace llvm;
  49. #define DEBUG_TYPE "inline-cost"
  50. STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
  51. static cl::opt<int>
  52. DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225),
  53. cl::desc("Default amount of inlining to perform"));
  54. // We introduce this option since there is a minor compile-time win by avoiding
  55. // addition of TTI attributes (target-features in particular) to inline
  56. // candidates when they are guaranteed to be the same as top level methods in
  57. // some use cases. If we avoid adding the attribute, we need an option to avoid
  58. // checking these attributes.
  59. static cl::opt<bool> IgnoreTTIInlineCompatible(
  60. "ignore-tti-inline-compatible", cl::Hidden, cl::init(false),
  61. cl::desc("Ignore TTI attributes compatibility check between callee/caller "
  62. "during inline cost calculation"));
  63. static cl::opt<bool> PrintInstructionComments(
  64. "print-instruction-comments", cl::Hidden, cl::init(false),
  65. cl::desc("Prints comments for instruction based on inline cost analysis"));
  66. static cl::opt<int> InlineThreshold(
  67. "inline-threshold", cl::Hidden, cl::init(225),
  68. cl::desc("Control the amount of inlining to perform (default = 225)"));
  69. static cl::opt<int> HintThreshold(
  70. "inlinehint-threshold", cl::Hidden, cl::init(325),
  71. cl::desc("Threshold for inlining functions with inline hint"));
  72. static cl::opt<int>
  73. ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
  74. cl::init(45),
  75. cl::desc("Threshold for inlining cold callsites"));
  76. static cl::opt<bool> InlineEnableCostBenefitAnalysis(
  77. "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false),
  78. cl::desc("Enable the cost-benefit analysis for the inliner"));
  79. static cl::opt<int> InlineSavingsMultiplier(
  80. "inline-savings-multiplier", cl::Hidden, cl::init(8),
  81. cl::desc("Multiplier to multiply cycle savings by during inlining"));
  82. static cl::opt<int>
  83. InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100),
  84. cl::desc("The maximum size of a callee that get's "
  85. "inlined without sufficient cycle savings"));
  86. // We introduce this threshold to help performance of instrumentation based
  87. // PGO before we actually hook up inliner with analysis passes such as BPI and
  88. // BFI.
  89. static cl::opt<int> ColdThreshold(
  90. "inlinecold-threshold", cl::Hidden, cl::init(45),
  91. cl::desc("Threshold for inlining functions with cold attribute"));
  92. static cl::opt<int>
  93. HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
  94. cl::desc("Threshold for hot callsites "));
  95. static cl::opt<int> LocallyHotCallSiteThreshold(
  96. "locally-hot-callsite-threshold", cl::Hidden, cl::init(525),
  97. cl::desc("Threshold for locally hot callsites "));
  98. static cl::opt<int> ColdCallSiteRelFreq(
  99. "cold-callsite-rel-freq", cl::Hidden, cl::init(2),
  100. cl::desc("Maximum block frequency, expressed as a percentage of caller's "
  101. "entry frequency, for a callsite to be cold in the absence of "
  102. "profile information."));
  103. static cl::opt<int> HotCallSiteRelFreq(
  104. "hot-callsite-rel-freq", cl::Hidden, cl::init(60),
  105. cl::desc("Minimum block frequency, expressed as a multiple of caller's "
  106. "entry frequency, for a callsite to be hot in the absence of "
  107. "profile information."));
  108. static cl::opt<int>
  109. InstrCost("inline-instr-cost", cl::Hidden, cl::init(5),
  110. cl::desc("Cost of a single instruction when inlining"));
  111. static cl::opt<int>
  112. MemAccessCost("inline-memaccess-cost", cl::Hidden, cl::init(0),
  113. cl::desc("Cost of load/store instruction when inlining"));
  114. static cl::opt<int> CallPenalty(
  115. "inline-call-penalty", cl::Hidden, cl::init(25),
  116. cl::desc("Call penalty that is applied per callsite when inlining"));
  117. static cl::opt<size_t>
  118. StackSizeThreshold("inline-max-stacksize", cl::Hidden,
  119. cl::init(std::numeric_limits<size_t>::max()),
  120. cl::desc("Do not inline functions with a stack size "
  121. "that exceeds the specified limit"));
  122. static cl::opt<size_t>
  123. RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden,
  124. cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller),
  125. cl::desc("Do not inline recursive functions with a stack "
  126. "size that exceeds the specified limit"));
  127. static cl::opt<bool> OptComputeFullInlineCost(
  128. "inline-cost-full", cl::Hidden,
  129. cl::desc("Compute the full inline cost of a call site even when the cost "
  130. "exceeds the threshold."));
  131. static cl::opt<bool> InlineCallerSupersetNoBuiltin(
  132. "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true),
  133. cl::desc("Allow inlining when caller has a superset of callee's nobuiltin "
  134. "attributes."));
  135. static cl::opt<bool> DisableGEPConstOperand(
  136. "disable-gep-const-evaluation", cl::Hidden, cl::init(false),
  137. cl::desc("Disables evaluation of GetElementPtr with constant operands"));
  138. namespace llvm {
  139. std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
  140. if (Attr.isValid()) {
  141. int AttrValue = 0;
  142. if (!Attr.getValueAsString().getAsInteger(10, AttrValue))
  143. return AttrValue;
  144. }
  145. return std::nullopt;
  146. }
  147. std::optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
  148. return getStringFnAttrAsInt(CB.getFnAttr(AttrKind));
  149. }
  150. std::optional<int> getStringFnAttrAsInt(Function *F, StringRef AttrKind) {
  151. return getStringFnAttrAsInt(F->getFnAttribute(AttrKind));
  152. }
  153. namespace InlineConstants {
  154. int getInstrCost() { return InstrCost; }
  155. } // namespace InlineConstants
  156. } // namespace llvm
  157. namespace {
  158. class InlineCostCallAnalyzer;
  159. // This struct is used to store information about inline cost of a
  160. // particular instruction
  161. struct InstructionCostDetail {
  162. int CostBefore = 0;
  163. int CostAfter = 0;
  164. int ThresholdBefore = 0;
  165. int ThresholdAfter = 0;
  166. int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; }
  167. int getCostDelta() const { return CostAfter - CostBefore; }
  168. bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; }
  169. };
  170. class InlineCostAnnotationWriter : public AssemblyAnnotationWriter {
  171. private:
  172. InlineCostCallAnalyzer *const ICCA;
  173. public:
  174. InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {}
  175. void emitInstructionAnnot(const Instruction *I,
  176. formatted_raw_ostream &OS) override;
  177. };
  178. /// Carry out call site analysis, in order to evaluate inlinability.
  179. /// NOTE: the type is currently used as implementation detail of functions such
  180. /// as llvm::getInlineCost. Note the function_ref constructor parameters - the
  181. /// expectation is that they come from the outer scope, from the wrapper
  182. /// functions. If we want to support constructing CallAnalyzer objects where
  183. /// lambdas are provided inline at construction, or where the object needs to
  184. /// otherwise survive past the scope of the provided functions, we need to
  185. /// revisit the argument types.
  186. class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  187. typedef InstVisitor<CallAnalyzer, bool> Base;
  188. friend class InstVisitor<CallAnalyzer, bool>;
  189. protected:
  190. virtual ~CallAnalyzer() = default;
  191. /// The TargetTransformInfo available for this compilation.
  192. const TargetTransformInfo &TTI;
  193. /// Getter for the cache of @llvm.assume intrinsics.
  194. function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
  195. /// Getter for BlockFrequencyInfo
  196. function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
  197. /// Profile summary information.
  198. ProfileSummaryInfo *PSI;
  199. /// The called function.
  200. Function &F;
  201. // Cache the DataLayout since we use it a lot.
  202. const DataLayout &DL;
  203. /// The OptimizationRemarkEmitter available for this compilation.
  204. OptimizationRemarkEmitter *ORE;
  205. /// The candidate callsite being analyzed. Please do not use this to do
  206. /// analysis in the caller function; we want the inline cost query to be
  207. /// easily cacheable. Instead, use the cover function paramHasAttr.
  208. CallBase &CandidateCall;
  209. /// Extension points for handling callsite features.
  210. // Called before a basic block was analyzed.
  211. virtual void onBlockStart(const BasicBlock *BB) {}
  212. /// Called after a basic block was analyzed.
  213. virtual void onBlockAnalyzed(const BasicBlock *BB) {}
  214. /// Called before an instruction was analyzed
  215. virtual void onInstructionAnalysisStart(const Instruction *I) {}
  216. /// Called after an instruction was analyzed
  217. virtual void onInstructionAnalysisFinish(const Instruction *I) {}
  218. /// Called at the end of the analysis of the callsite. Return the outcome of
  219. /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
  220. /// the reason it can't.
  221. virtual InlineResult finalizeAnalysis() { return InlineResult::success(); }
  222. /// Called when we're about to start processing a basic block, and every time
  223. /// we are done processing an instruction. Return true if there is no point in
  224. /// continuing the analysis (e.g. we've determined already the call site is
  225. /// too expensive to inline)
  226. virtual bool shouldStop() { return false; }
  227. /// Called before the analysis of the callee body starts (with callsite
  228. /// contexts propagated). It checks callsite-specific information. Return a
  229. /// reason analysis can't continue if that's the case, or 'true' if it may
  230. /// continue.
  231. virtual InlineResult onAnalysisStart() { return InlineResult::success(); }
  232. /// Called if the analysis engine decides SROA cannot be done for the given
  233. /// alloca.
  234. virtual void onDisableSROA(AllocaInst *Arg) {}
  235. /// Called the analysis engine determines load elimination won't happen.
  236. virtual void onDisableLoadElimination() {}
  237. /// Called when we visit a CallBase, before the analysis starts. Return false
  238. /// to stop further processing of the instruction.
  239. virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
  240. /// Called to account for a call.
  241. virtual void onCallPenalty() {}
  242. /// Called to account for a load or store.
  243. virtual void onMemAccess(){};
  244. /// Called to account for the expectation the inlining would result in a load
  245. /// elimination.
  246. virtual void onLoadEliminationOpportunity() {}
  247. /// Called to account for the cost of argument setup for the Call in the
  248. /// callee's body (not the callsite currently under analysis).
  249. virtual void onCallArgumentSetup(const CallBase &Call) {}
  250. /// Called to account for a load relative intrinsic.
  251. virtual void onLoadRelativeIntrinsic() {}
  252. /// Called to account for a lowered call.
  253. virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) {
  254. }
  255. /// Account for a jump table of given size. Return false to stop further
  256. /// processing the switch instruction
  257. virtual bool onJumpTable(unsigned JumpTableSize) { return true; }
  258. /// Account for a case cluster of given size. Return false to stop further
  259. /// processing of the instruction.
  260. virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; }
  261. /// Called at the end of processing a switch instruction, with the given
  262. /// number of case clusters.
  263. virtual void onFinalizeSwitch(unsigned JumpTableSize,
  264. unsigned NumCaseCluster) {}
  265. /// Called to account for any other instruction not specifically accounted
  266. /// for.
  267. virtual void onMissedSimplification() {}
  268. /// Start accounting potential benefits due to SROA for the given alloca.
  269. virtual void onInitializeSROAArg(AllocaInst *Arg) {}
  270. /// Account SROA savings for the AllocaInst value.
  271. virtual void onAggregateSROAUse(AllocaInst *V) {}
  272. bool handleSROA(Value *V, bool DoNotDisable) {
  273. // Check for SROA candidates in comparisons.
  274. if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
  275. if (DoNotDisable) {
  276. onAggregateSROAUse(SROAArg);
  277. return true;
  278. }
  279. disableSROAForArg(SROAArg);
  280. }
  281. return false;
  282. }
  283. bool IsCallerRecursive = false;
  284. bool IsRecursiveCall = false;
  285. bool ExposesReturnsTwice = false;
  286. bool HasDynamicAlloca = false;
  287. bool ContainsNoDuplicateCall = false;
  288. bool HasReturn = false;
  289. bool HasIndirectBr = false;
  290. bool HasUninlineableIntrinsic = false;
  291. bool InitsVargArgs = false;
  292. /// Number of bytes allocated statically by the callee.
  293. uint64_t AllocatedSize = 0;
  294. unsigned NumInstructions = 0;
  295. unsigned NumVectorInstructions = 0;
  296. /// While we walk the potentially-inlined instructions, we build up and
  297. /// maintain a mapping of simplified values specific to this callsite. The
  298. /// idea is to propagate any special information we have about arguments to
  299. /// this call through the inlinable section of the function, and account for
  300. /// likely simplifications post-inlining. The most important aspect we track
  301. /// is CFG altering simplifications -- when we prove a basic block dead, that
  302. /// can cause dramatic shifts in the cost of inlining a function.
  303. DenseMap<Value *, Constant *> SimplifiedValues;
  304. /// Keep track of the values which map back (through function arguments) to
  305. /// allocas on the caller stack which could be simplified through SROA.
  306. DenseMap<Value *, AllocaInst *> SROAArgValues;
  307. /// Keep track of Allocas for which we believe we may get SROA optimization.
  308. DenseSet<AllocaInst *> EnabledSROAAllocas;
  309. /// Keep track of values which map to a pointer base and constant offset.
  310. DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
  311. /// Keep track of dead blocks due to the constant arguments.
  312. SmallPtrSet<BasicBlock *, 16> DeadBlocks;
  313. /// The mapping of the blocks to their known unique successors due to the
  314. /// constant arguments.
  315. DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
  316. /// Model the elimination of repeated loads that is expected to happen
  317. /// whenever we simplify away the stores that would otherwise cause them to be
  318. /// loads.
  319. bool EnableLoadElimination = true;
  320. /// Whether we allow inlining for recursive call.
  321. bool AllowRecursiveCall = false;
  322. SmallPtrSet<Value *, 16> LoadAddrSet;
  323. AllocaInst *getSROAArgForValueOrNull(Value *V) const {
  324. auto It = SROAArgValues.find(V);
  325. if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0)
  326. return nullptr;
  327. return It->second;
  328. }
  329. // Custom simplification helper routines.
  330. bool isAllocaDerivedArg(Value *V);
  331. void disableSROAForArg(AllocaInst *SROAArg);
  332. void disableSROA(Value *V);
  333. void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
  334. void disableLoadElimination();
  335. bool isGEPFree(GetElementPtrInst &GEP);
  336. bool canFoldInboundsGEP(GetElementPtrInst &I);
  337. bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
  338. bool simplifyCallSite(Function *F, CallBase &Call);
  339. bool simplifyInstruction(Instruction &I);
  340. bool simplifyIntrinsicCallIsConstant(CallBase &CB);
  341. bool simplifyIntrinsicCallObjectSize(CallBase &CB);
  342. ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
  343. /// Return true if the given argument to the function being considered for
  344. /// inlining has the given attribute set either at the call site or the
  345. /// function declaration. Primarily used to inspect call site specific
  346. /// attributes since these can be more precise than the ones on the callee
  347. /// itself.
  348. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
  349. /// Return true if the given value is known non null within the callee if
  350. /// inlined through this particular callsite.
  351. bool isKnownNonNullInCallee(Value *V);
  352. /// Return true if size growth is allowed when inlining the callee at \p Call.
  353. bool allowSizeGrowth(CallBase &Call);
  354. // Custom analysis routines.
  355. InlineResult analyzeBlock(BasicBlock *BB,
  356. SmallPtrSetImpl<const Value *> &EphValues);
  357. // Disable several entry points to the visitor so we don't accidentally use
  358. // them by declaring but not defining them here.
  359. void visit(Module *);
  360. void visit(Module &);
  361. void visit(Function *);
  362. void visit(Function &);
  363. void visit(BasicBlock *);
  364. void visit(BasicBlock &);
  365. // Provide base case for our instruction visit.
  366. bool visitInstruction(Instruction &I);
  367. // Our visit overrides.
  368. bool visitAlloca(AllocaInst &I);
  369. bool visitPHI(PHINode &I);
  370. bool visitGetElementPtr(GetElementPtrInst &I);
  371. bool visitBitCast(BitCastInst &I);
  372. bool visitPtrToInt(PtrToIntInst &I);
  373. bool visitIntToPtr(IntToPtrInst &I);
  374. bool visitCastInst(CastInst &I);
  375. bool visitCmpInst(CmpInst &I);
  376. bool visitSub(BinaryOperator &I);
  377. bool visitBinaryOperator(BinaryOperator &I);
  378. bool visitFNeg(UnaryOperator &I);
  379. bool visitLoad(LoadInst &I);
  380. bool visitStore(StoreInst &I);
  381. bool visitExtractValue(ExtractValueInst &I);
  382. bool visitInsertValue(InsertValueInst &I);
  383. bool visitCallBase(CallBase &Call);
  384. bool visitReturnInst(ReturnInst &RI);
  385. bool visitBranchInst(BranchInst &BI);
  386. bool visitSelectInst(SelectInst &SI);
  387. bool visitSwitchInst(SwitchInst &SI);
  388. bool visitIndirectBrInst(IndirectBrInst &IBI);
  389. bool visitResumeInst(ResumeInst &RI);
  390. bool visitCleanupReturnInst(CleanupReturnInst &RI);
  391. bool visitCatchReturnInst(CatchReturnInst &RI);
  392. bool visitUnreachableInst(UnreachableInst &I);
  393. public:
  394. CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI,
  395. function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
  396. function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
  397. ProfileSummaryInfo *PSI = nullptr,
  398. OptimizationRemarkEmitter *ORE = nullptr)
  399. : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
  400. PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
  401. CandidateCall(Call) {}
  402. InlineResult analyze();
  403. std::optional<Constant *> getSimplifiedValue(Instruction *I) {
  404. if (SimplifiedValues.find(I) != SimplifiedValues.end())
  405. return SimplifiedValues[I];
  406. return std::nullopt;
  407. }
  408. // Keep a bunch of stats about the cost savings found so we can print them
  409. // out when debugging.
  410. unsigned NumConstantArgs = 0;
  411. unsigned NumConstantOffsetPtrArgs = 0;
  412. unsigned NumAllocaArgs = 0;
  413. unsigned NumConstantPtrCmps = 0;
  414. unsigned NumConstantPtrDiffs = 0;
  415. unsigned NumInstructionsSimplified = 0;
  416. void dump();
  417. };
  418. // Considering forming a binary search, we should find the number of nodes
  419. // which is same as the number of comparisons when lowered. For a given
  420. // number of clusters, n, we can define a recursive function, f(n), to find
  421. // the number of nodes in the tree. The recursion is :
  422. // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
  423. // and f(n) = n, when n <= 3.
  424. // This will lead a binary tree where the leaf should be either f(2) or f(3)
  425. // when n > 3. So, the number of comparisons from leaves should be n, while
  426. // the number of non-leaf should be :
  427. // 2^(log2(n) - 1) - 1
  428. // = 2^log2(n) * 2^-1 - 1
  429. // = n / 2 - 1.
  430. // Considering comparisons from leaf and non-leaf nodes, we can estimate the
  431. // number of comparisons in a simple closed form :
  432. // n + n / 2 - 1 = n * 3 / 2 - 1
  433. int64_t getExpectedNumberOfCompare(int NumCaseCluster) {
  434. return 3 * static_cast<int64_t>(NumCaseCluster) / 2 - 1;
  435. }
  436. /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note
  437. /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer
  438. class InlineCostCallAnalyzer final : public CallAnalyzer {
  439. const bool ComputeFullInlineCost;
  440. int LoadEliminationCost = 0;
  441. /// Bonus to be applied when percentage of vector instructions in callee is
  442. /// high (see more details in updateThreshold).
  443. int VectorBonus = 0;
  444. /// Bonus to be applied when the callee has only one reachable basic block.
  445. int SingleBBBonus = 0;
  446. /// Tunable parameters that control the analysis.
  447. const InlineParams &Params;
  448. // This DenseMap stores the delta change in cost and threshold after
  449. // accounting for the given instruction. The map is filled only with the
  450. // flag PrintInstructionComments on.
  451. DenseMap<const Instruction *, InstructionCostDetail> InstructionCostDetailMap;
  452. /// Upper bound for the inlining cost. Bonuses are being applied to account
  453. /// for speculative "expected profit" of the inlining decision.
  454. int Threshold = 0;
  455. /// The amount of StaticBonus applied.
  456. int StaticBonusApplied = 0;
  457. /// Attempt to evaluate indirect calls to boost its inline cost.
  458. const bool BoostIndirectCalls;
  459. /// Ignore the threshold when finalizing analysis.
  460. const bool IgnoreThreshold;
  461. // True if the cost-benefit-analysis-based inliner is enabled.
  462. const bool CostBenefitAnalysisEnabled;
  463. /// Inlining cost measured in abstract units, accounts for all the
  464. /// instructions expected to be executed for a given function invocation.
  465. /// Instructions that are statically proven to be dead based on call-site
  466. /// arguments are not counted here.
  467. int Cost = 0;
  468. // The cumulative cost at the beginning of the basic block being analyzed. At
  469. // the end of analyzing each basic block, "Cost - CostAtBBStart" represents
  470. // the size of that basic block.
  471. int CostAtBBStart = 0;
  472. // The static size of live but cold basic blocks. This is "static" in the
  473. // sense that it's not weighted by profile counts at all.
  474. int ColdSize = 0;
  475. // Whether inlining is decided by cost-threshold analysis.
  476. bool DecidedByCostThreshold = false;
  477. // Whether inlining is decided by cost-benefit analysis.
  478. bool DecidedByCostBenefit = false;
  479. // The cost-benefit pair computed by cost-benefit analysis.
  480. std::optional<CostBenefitPair> CostBenefit;
  481. bool SingleBB = true;
  482. unsigned SROACostSavings = 0;
  483. unsigned SROACostSavingsLost = 0;
  484. /// The mapping of caller Alloca values to their accumulated cost savings. If
  485. /// we have to disable SROA for one of the allocas, this tells us how much
  486. /// cost must be added.
  487. DenseMap<AllocaInst *, int> SROAArgCosts;
  488. /// Return true if \p Call is a cold callsite.
  489. bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI);
  490. /// Update Threshold based on callsite properties such as callee
  491. /// attributes and callee hotness for PGO builds. The Callee is explicitly
  492. /// passed to support analyzing indirect calls whose target is inferred by
  493. /// analysis.
  494. void updateThreshold(CallBase &Call, Function &Callee);
  495. /// Return a higher threshold if \p Call is a hot callsite.
  496. std::optional<int> getHotCallSiteThreshold(CallBase &Call,
  497. BlockFrequencyInfo *CallerBFI);
  498. /// Handle a capped 'int' increment for Cost.
  499. void addCost(int64_t Inc) {
  500. Inc = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc), INT_MIN);
  501. Cost = std::max<int64_t>(std::min<int64_t>(INT_MAX, Inc + Cost), INT_MIN);
  502. }
  503. void onDisableSROA(AllocaInst *Arg) override {
  504. auto CostIt = SROAArgCosts.find(Arg);
  505. if (CostIt == SROAArgCosts.end())
  506. return;
  507. addCost(CostIt->second);
  508. SROACostSavings -= CostIt->second;
  509. SROACostSavingsLost += CostIt->second;
  510. SROAArgCosts.erase(CostIt);
  511. }
  512. void onDisableLoadElimination() override {
  513. addCost(LoadEliminationCost);
  514. LoadEliminationCost = 0;
  515. }
  516. bool onCallBaseVisitStart(CallBase &Call) override {
  517. if (std::optional<int> AttrCallThresholdBonus =
  518. getStringFnAttrAsInt(Call, "call-threshold-bonus"))
  519. Threshold += *AttrCallThresholdBonus;
  520. if (std::optional<int> AttrCallCost =
  521. getStringFnAttrAsInt(Call, "call-inline-cost")) {
  522. addCost(*AttrCallCost);
  523. // Prevent further processing of the call since we want to override its
  524. // inline cost, not just add to it.
  525. return false;
  526. }
  527. return true;
  528. }
  529. void onCallPenalty() override { addCost(CallPenalty); }
  530. void onMemAccess() override { addCost(MemAccessCost); }
  531. void onCallArgumentSetup(const CallBase &Call) override {
  532. // Pay the price of the argument setup. We account for the average 1
  533. // instruction per call argument setup here.
  534. addCost(Call.arg_size() * InstrCost);
  535. }
  536. void onLoadRelativeIntrinsic() override {
  537. // This is normally lowered to 4 LLVM instructions.
  538. addCost(3 * InstrCost);
  539. }
  540. void onLoweredCall(Function *F, CallBase &Call,
  541. bool IsIndirectCall) override {
  542. // We account for the average 1 instruction per call argument setup here.
  543. addCost(Call.arg_size() * InstrCost);
  544. // If we have a constant that we are calling as a function, we can peer
  545. // through it and see the function target. This happens not infrequently
  546. // during devirtualization and so we want to give it a hefty bonus for
  547. // inlining, but cap that bonus in the event that inlining wouldn't pan out.
  548. // Pretend to inline the function, with a custom threshold.
  549. if (IsIndirectCall && BoostIndirectCalls) {
  550. auto IndirectCallParams = Params;
  551. IndirectCallParams.DefaultThreshold =
  552. InlineConstants::IndirectCallThreshold;
  553. /// FIXME: if InlineCostCallAnalyzer is derived from, this may need
  554. /// to instantiate the derived class.
  555. InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
  556. GetAssumptionCache, GetBFI, PSI, ORE, false);
  557. if (CA.analyze().isSuccess()) {
  558. // We were able to inline the indirect call! Subtract the cost from the
  559. // threshold to get the bonus we want to apply, but don't go below zero.
  560. Cost -= std::max(0, CA.getThreshold() - CA.getCost());
  561. }
  562. } else
  563. // Otherwise simply add the cost for merely making the call.
  564. addCost(CallPenalty);
  565. }
  566. void onFinalizeSwitch(unsigned JumpTableSize,
  567. unsigned NumCaseCluster) override {
  568. // If suitable for a jump table, consider the cost for the table size and
  569. // branch to destination.
  570. // Maximum valid cost increased in this function.
  571. if (JumpTableSize) {
  572. int64_t JTCost =
  573. static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
  574. addCost(JTCost);
  575. return;
  576. }
  577. if (NumCaseCluster <= 3) {
  578. // Suppose a comparison includes one compare and one conditional branch.
  579. addCost(NumCaseCluster * 2 * InstrCost);
  580. return;
  581. }
  582. int64_t ExpectedNumberOfCompare =
  583. getExpectedNumberOfCompare(NumCaseCluster);
  584. int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InstrCost;
  585. addCost(SwitchCost);
  586. }
  587. void onMissedSimplification() override { addCost(InstrCost); }
  588. void onInitializeSROAArg(AllocaInst *Arg) override {
  589. assert(Arg != nullptr &&
  590. "Should not initialize SROA costs for null value.");
  591. SROAArgCosts[Arg] = 0;
  592. }
  593. void onAggregateSROAUse(AllocaInst *SROAArg) override {
  594. auto CostIt = SROAArgCosts.find(SROAArg);
  595. assert(CostIt != SROAArgCosts.end() &&
  596. "expected this argument to have a cost");
  597. CostIt->second += InstrCost;
  598. SROACostSavings += InstrCost;
  599. }
  600. void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; }
  601. void onBlockAnalyzed(const BasicBlock *BB) override {
  602. if (CostBenefitAnalysisEnabled) {
  603. // Keep track of the static size of live but cold basic blocks. For now,
  604. // we define a cold basic block to be one that's never executed.
  605. assert(GetBFI && "GetBFI must be available");
  606. BlockFrequencyInfo *BFI = &(GetBFI(F));
  607. assert(BFI && "BFI must be available");
  608. auto ProfileCount = BFI->getBlockProfileCount(BB);
  609. if (*ProfileCount == 0)
  610. ColdSize += Cost - CostAtBBStart;
  611. }
  612. auto *TI = BB->getTerminator();
  613. // If we had any successors at this point, than post-inlining is likely to
  614. // have them as well. Note that we assume any basic blocks which existed
  615. // due to branches or switches which folded above will also fold after
  616. // inlining.
  617. if (SingleBB && TI->getNumSuccessors() > 1) {
  618. // Take off the bonus we applied to the threshold.
  619. Threshold -= SingleBBBonus;
  620. SingleBB = false;
  621. }
  622. }
  623. void onInstructionAnalysisStart(const Instruction *I) override {
  624. // This function is called to store the initial cost of inlining before
  625. // the given instruction was assessed.
  626. if (!PrintInstructionComments)
  627. return;
  628. InstructionCostDetailMap[I].CostBefore = Cost;
  629. InstructionCostDetailMap[I].ThresholdBefore = Threshold;
  630. }
  631. void onInstructionAnalysisFinish(const Instruction *I) override {
  632. // This function is called to find new values of cost and threshold after
  633. // the instruction has been assessed.
  634. if (!PrintInstructionComments)
  635. return;
  636. InstructionCostDetailMap[I].CostAfter = Cost;
  637. InstructionCostDetailMap[I].ThresholdAfter = Threshold;
  638. }
  639. bool isCostBenefitAnalysisEnabled() {
  640. if (!PSI || !PSI->hasProfileSummary())
  641. return false;
  642. if (!GetBFI)
  643. return false;
  644. if (InlineEnableCostBenefitAnalysis.getNumOccurrences()) {
  645. // Honor the explicit request from the user.
  646. if (!InlineEnableCostBenefitAnalysis)
  647. return false;
  648. } else {
  649. // Otherwise, require instrumentation profile.
  650. if (!PSI->hasInstrumentationProfile())
  651. return false;
  652. }
  653. auto *Caller = CandidateCall.getParent()->getParent();
  654. if (!Caller->getEntryCount())
  655. return false;
  656. BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller));
  657. if (!CallerBFI)
  658. return false;
  659. // For now, limit to hot call site.
  660. if (!PSI->isHotCallSite(CandidateCall, CallerBFI))
  661. return false;
  662. // Make sure we have a nonzero entry count.
  663. auto EntryCount = F.getEntryCount();
  664. if (!EntryCount || !EntryCount->getCount())
  665. return false;
  666. BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
  667. if (!CalleeBFI)
  668. return false;
  669. return true;
  670. }
  671. // Determine whether we should inline the given call site, taking into account
  672. // both the size cost and the cycle savings. Return std::nullopt if we don't
  673. // have suficient profiling information to determine.
  674. std::optional<bool> costBenefitAnalysis() {
  675. if (!CostBenefitAnalysisEnabled)
  676. return std::nullopt;
  677. // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0
  678. // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by
  679. // falling back to the cost-based metric.
  680. // TODO: Improve this hacky condition.
  681. if (Threshold == 0)
  682. return std::nullopt;
  683. assert(GetBFI);
  684. BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
  685. assert(CalleeBFI);
  686. // The cycle savings expressed as the sum of InstrCost
  687. // multiplied by the estimated dynamic count of each instruction we can
  688. // avoid. Savings come from the call site cost, such as argument setup and
  689. // the call instruction, as well as the instructions that are folded.
  690. //
  691. // We use 128-bit APInt here to avoid potential overflow. This variable
  692. // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case
  693. // assumes that we can avoid or fold a billion instructions, each with a
  694. // profile count of 10^^15 -- roughly the number of cycles for a 24-hour
  695. // period on a 4GHz machine.
  696. APInt CycleSavings(128, 0);
  697. for (auto &BB : F) {
  698. APInt CurrentSavings(128, 0);
  699. for (auto &I : BB) {
  700. if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
  701. // Count a conditional branch as savings if it becomes unconditional.
  702. if (BI->isConditional() &&
  703. isa_and_nonnull<ConstantInt>(
  704. SimplifiedValues.lookup(BI->getCondition()))) {
  705. CurrentSavings += InstrCost;
  706. }
  707. } else if (Value *V = dyn_cast<Value>(&I)) {
  708. // Count an instruction as savings if we can fold it.
  709. if (SimplifiedValues.count(V)) {
  710. CurrentSavings += InstrCost;
  711. }
  712. }
  713. }
  714. auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB);
  715. CurrentSavings *= *ProfileCount;
  716. CycleSavings += CurrentSavings;
  717. }
  718. // Compute the cycle savings per call.
  719. auto EntryProfileCount = F.getEntryCount();
  720. assert(EntryProfileCount && EntryProfileCount->getCount());
  721. auto EntryCount = EntryProfileCount->getCount();
  722. CycleSavings += EntryCount / 2;
  723. CycleSavings = CycleSavings.udiv(EntryCount);
  724. // Compute the total savings for the call site.
  725. auto *CallerBB = CandidateCall.getParent();
  726. BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent())));
  727. CycleSavings += getCallsiteCost(this->CandidateCall, DL);
  728. CycleSavings *= *CallerBFI->getBlockProfileCount(CallerBB);
  729. // Remove the cost of the cold basic blocks.
  730. int Size = Cost - ColdSize;
  731. // Allow tiny callees to be inlined regardless of whether they meet the
  732. // savings threshold.
  733. Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1;
  734. CostBenefit.emplace(APInt(128, Size), CycleSavings);
  735. // Return true if the savings justify the cost of inlining. Specifically,
  736. // we evaluate the following inequality:
  737. //
  738. // CycleSavings PSI->getOrCompHotCountThreshold()
  739. // -------------- >= -----------------------------------
  740. // Size InlineSavingsMultiplier
  741. //
  742. // Note that the left hand side is specific to a call site. The right hand
  743. // side is a constant for the entire executable.
  744. APInt LHS = CycleSavings;
  745. LHS *= InlineSavingsMultiplier;
  746. APInt RHS(128, PSI->getOrCompHotCountThreshold());
  747. RHS *= Size;
  748. return LHS.uge(RHS);
  749. }
  750. InlineResult finalizeAnalysis() override {
  751. // Loops generally act a lot like calls in that they act like barriers to
  752. // movement, require a certain amount of setup, etc. So when optimising for
  753. // size, we penalise any call sites that perform loops. We do this after all
  754. // other costs here, so will likely only be dealing with relatively small
  755. // functions (and hence DT and LI will hopefully be cheap).
  756. auto *Caller = CandidateCall.getFunction();
  757. if (Caller->hasMinSize()) {
  758. DominatorTree DT(F);
  759. LoopInfo LI(DT);
  760. int NumLoops = 0;
  761. for (Loop *L : LI) {
  762. // Ignore loops that will not be executed
  763. if (DeadBlocks.count(L->getHeader()))
  764. continue;
  765. NumLoops++;
  766. }
  767. addCost(NumLoops * InlineConstants::LoopPenalty);
  768. }
  769. // We applied the maximum possible vector bonus at the beginning. Now,
  770. // subtract the excess bonus, if any, from the Threshold before
  771. // comparing against Cost.
  772. if (NumVectorInstructions <= NumInstructions / 10)
  773. Threshold -= VectorBonus;
  774. else if (NumVectorInstructions <= NumInstructions / 2)
  775. Threshold -= VectorBonus / 2;
  776. if (std::optional<int> AttrCost =
  777. getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
  778. Cost = *AttrCost;
  779. if (std::optional<int> AttrCostMult = getStringFnAttrAsInt(
  780. CandidateCall,
  781. InlineConstants::FunctionInlineCostMultiplierAttributeName))
  782. Cost *= *AttrCostMult;
  783. if (std::optional<int> AttrThreshold =
  784. getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
  785. Threshold = *AttrThreshold;
  786. if (auto Result = costBenefitAnalysis()) {
  787. DecidedByCostBenefit = true;
  788. if (*Result)
  789. return InlineResult::success();
  790. else
  791. return InlineResult::failure("Cost over threshold.");
  792. }
  793. if (IgnoreThreshold)
  794. return InlineResult::success();
  795. DecidedByCostThreshold = true;
  796. return Cost < std::max(1, Threshold)
  797. ? InlineResult::success()
  798. : InlineResult::failure("Cost over threshold.");
  799. }
  800. bool shouldStop() override {
  801. if (IgnoreThreshold || ComputeFullInlineCost)
  802. return false;
  803. // Bail out the moment we cross the threshold. This means we'll under-count
  804. // the cost, but only when undercounting doesn't matter.
  805. if (Cost < Threshold)
  806. return false;
  807. DecidedByCostThreshold = true;
  808. return true;
  809. }
  810. void onLoadEliminationOpportunity() override {
  811. LoadEliminationCost += InstrCost;
  812. }
  813. InlineResult onAnalysisStart() override {
  814. // Perform some tweaks to the cost and threshold based on the direct
  815. // callsite information.
  816. // We want to more aggressively inline vector-dense kernels, so up the
  817. // threshold, and we'll lower it if the % of vector instructions gets too
  818. // low. Note that these bonuses are some what arbitrary and evolved over
  819. // time by accident as much as because they are principled bonuses.
  820. //
  821. // FIXME: It would be nice to remove all such bonuses. At least it would be
  822. // nice to base the bonus values on something more scientific.
  823. assert(NumInstructions == 0);
  824. assert(NumVectorInstructions == 0);
  825. // Update the threshold based on callsite properties
  826. updateThreshold(CandidateCall, F);
  827. // While Threshold depends on commandline options that can take negative
  828. // values, we want to enforce the invariant that the computed threshold and
  829. // bonuses are non-negative.
  830. assert(Threshold >= 0);
  831. assert(SingleBBBonus >= 0);
  832. assert(VectorBonus >= 0);
  833. // Speculatively apply all possible bonuses to Threshold. If cost exceeds
  834. // this Threshold any time, and cost cannot decrease, we can stop processing
  835. // the rest of the function body.
  836. Threshold += (SingleBBBonus + VectorBonus);
  837. // Give out bonuses for the callsite, as the instructions setting them up
  838. // will be gone after inlining.
  839. addCost(-getCallsiteCost(this->CandidateCall, DL));
  840. // If this function uses the coldcc calling convention, prefer not to inline
  841. // it.
  842. if (F.getCallingConv() == CallingConv::Cold)
  843. Cost += InlineConstants::ColdccPenalty;
  844. LLVM_DEBUG(dbgs() << " Initial cost: " << Cost << "\n");
  845. // Check if we're done. This can happen due to bonuses and penalties.
  846. if (Cost >= Threshold && !ComputeFullInlineCost)
  847. return InlineResult::failure("high cost");
  848. return InlineResult::success();
  849. }
  850. public:
  851. InlineCostCallAnalyzer(
  852. Function &Callee, CallBase &Call, const InlineParams &Params,
  853. const TargetTransformInfo &TTI,
  854. function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
  855. function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
  856. ProfileSummaryInfo *PSI = nullptr,
  857. OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
  858. bool IgnoreThreshold = false)
  859. : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE),
  860. ComputeFullInlineCost(OptComputeFullInlineCost ||
  861. Params.ComputeFullInlineCost || ORE ||
  862. isCostBenefitAnalysisEnabled()),
  863. Params(Params), Threshold(Params.DefaultThreshold),
  864. BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
  865. CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
  866. Writer(this) {
  867. AllowRecursiveCall = *Params.AllowRecursiveCall;
  868. }
  869. /// Annotation Writer for instruction details
  870. InlineCostAnnotationWriter Writer;
  871. void dump();
  872. // Prints the same analysis as dump(), but its definition is not dependent
  873. // on the build.
  874. void print(raw_ostream &OS);
  875. std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
  876. if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
  877. return InstructionCostDetailMap[I];
  878. return std::nullopt;
  879. }
  880. virtual ~InlineCostCallAnalyzer() = default;
  881. int getThreshold() const { return Threshold; }
  882. int getCost() const { return Cost; }
  883. int getStaticBonusApplied() const { return StaticBonusApplied; }
  884. std::optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
  885. bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
  886. bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
  887. };
  888. // Return true if CB is the sole call to local function Callee.
  889. static bool isSoleCallToLocalFunction(const CallBase &CB,
  890. const Function &Callee) {
  891. return Callee.hasLocalLinkage() && Callee.hasOneLiveUse() &&
  892. &Callee == CB.getCalledFunction();
  893. }
  894. class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
  895. private:
  896. InlineCostFeatures Cost = {};
  897. // FIXME: These constants are taken from the heuristic-based cost visitor.
  898. // These should be removed entirely in a later revision to avoid reliance on
  899. // heuristics in the ML inliner.
  900. static constexpr int JTCostMultiplier = 4;
  901. static constexpr int CaseClusterCostMultiplier = 2;
  902. static constexpr int SwitchCostMultiplier = 2;
  903. // FIXME: These are taken from the heuristic-based cost visitor: we should
  904. // eventually abstract these to the CallAnalyzer to avoid duplication.
  905. unsigned SROACostSavingOpportunities = 0;
  906. int VectorBonus = 0;
  907. int SingleBBBonus = 0;
  908. int Threshold = 5;
  909. DenseMap<AllocaInst *, unsigned> SROACosts;
  910. void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) {
  911. Cost[static_cast<size_t>(Feature)] += Delta;
  912. }
  913. void set(InlineCostFeatureIndex Feature, int64_t Value) {
  914. Cost[static_cast<size_t>(Feature)] = Value;
  915. }
  916. void onDisableSROA(AllocaInst *Arg) override {
  917. auto CostIt = SROACosts.find(Arg);
  918. if (CostIt == SROACosts.end())
  919. return;
  920. increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
  921. SROACostSavingOpportunities -= CostIt->second;
  922. SROACosts.erase(CostIt);
  923. }
  924. void onDisableLoadElimination() override {
  925. set(InlineCostFeatureIndex::LoadElimination, 1);
  926. }
  927. void onCallPenalty() override {
  928. increment(InlineCostFeatureIndex::CallPenalty, CallPenalty);
  929. }
  930. void onCallArgumentSetup(const CallBase &Call) override {
  931. increment(InlineCostFeatureIndex::CallArgumentSetup,
  932. Call.arg_size() * InstrCost);
  933. }
  934. void onLoadRelativeIntrinsic() override {
  935. increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InstrCost);
  936. }
  937. void onLoweredCall(Function *F, CallBase &Call,
  938. bool IsIndirectCall) override {
  939. increment(InlineCostFeatureIndex::LoweredCallArgSetup,
  940. Call.arg_size() * InstrCost);
  941. if (IsIndirectCall) {
  942. InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0,
  943. /*HintThreshold*/ {},
  944. /*ColdThreshold*/ {},
  945. /*OptSizeThreshold*/ {},
  946. /*OptMinSizeThreshold*/ {},
  947. /*HotCallSiteThreshold*/ {},
  948. /*LocallyHotCallSiteThreshold*/ {},
  949. /*ColdCallSiteThreshold*/ {},
  950. /*ComputeFullInlineCost*/ true,
  951. /*EnableDeferral*/ true};
  952. IndirectCallParams.DefaultThreshold =
  953. InlineConstants::IndirectCallThreshold;
  954. InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI,
  955. GetAssumptionCache, GetBFI, PSI, ORE, false,
  956. true);
  957. if (CA.analyze().isSuccess()) {
  958. increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
  959. CA.getCost());
  960. increment(InlineCostFeatureIndex::NestedInlines, 1);
  961. }
  962. } else {
  963. onCallPenalty();
  964. }
  965. }
  966. void onFinalizeSwitch(unsigned JumpTableSize,
  967. unsigned NumCaseCluster) override {
  968. if (JumpTableSize) {
  969. int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
  970. JTCostMultiplier * InstrCost;
  971. increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
  972. return;
  973. }
  974. if (NumCaseCluster <= 3) {
  975. increment(InlineCostFeatureIndex::CaseClusterPenalty,
  976. NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
  977. return;
  978. }
  979. int64_t ExpectedNumberOfCompare =
  980. getExpectedNumberOfCompare(NumCaseCluster);
  981. int64_t SwitchCost =
  982. ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
  983. increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
  984. }
  985. void onMissedSimplification() override {
  986. increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
  987. InstrCost);
  988. }
  989. void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
  990. void onAggregateSROAUse(AllocaInst *Arg) override {
  991. SROACosts.find(Arg)->second += InstrCost;
  992. SROACostSavingOpportunities += InstrCost;
  993. }
  994. void onBlockAnalyzed(const BasicBlock *BB) override {
  995. if (BB->getTerminator()->getNumSuccessors() > 1)
  996. set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
  997. Threshold -= SingleBBBonus;
  998. }
  999. InlineResult finalizeAnalysis() override {
  1000. auto *Caller = CandidateCall.getFunction();
  1001. if (Caller->hasMinSize()) {
  1002. DominatorTree DT(F);
  1003. LoopInfo LI(DT);
  1004. for (Loop *L : LI) {
  1005. // Ignore loops that will not be executed
  1006. if (DeadBlocks.count(L->getHeader()))
  1007. continue;
  1008. increment(InlineCostFeatureIndex::NumLoops,
  1009. InlineConstants::LoopPenalty);
  1010. }
  1011. }
  1012. set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
  1013. set(InlineCostFeatureIndex::SimplifiedInstructions,
  1014. NumInstructionsSimplified);
  1015. set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
  1016. set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
  1017. NumConstantOffsetPtrArgs);
  1018. set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
  1019. if (NumVectorInstructions <= NumInstructions / 10)
  1020. Threshold -= VectorBonus;
  1021. else if (NumVectorInstructions <= NumInstructions / 2)
  1022. Threshold -= VectorBonus / 2;
  1023. set(InlineCostFeatureIndex::Threshold, Threshold);
  1024. return InlineResult::success();
  1025. }
  1026. bool shouldStop() override { return false; }
  1027. void onLoadEliminationOpportunity() override {
  1028. increment(InlineCostFeatureIndex::LoadElimination, 1);
  1029. }
  1030. InlineResult onAnalysisStart() override {
  1031. increment(InlineCostFeatureIndex::CallSiteCost,
  1032. -1 * getCallsiteCost(this->CandidateCall, DL));
  1033. set(InlineCostFeatureIndex::ColdCcPenalty,
  1034. (F.getCallingConv() == CallingConv::Cold));
  1035. set(InlineCostFeatureIndex::LastCallToStaticBonus,
  1036. isSoleCallToLocalFunction(CandidateCall, F));
  1037. // FIXME: we shouldn't repeat this logic in both the Features and Cost
  1038. // analyzer - instead, we should abstract it to a common method in the
  1039. // CallAnalyzer
  1040. int SingleBBBonusPercent = 50;
  1041. int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
  1042. Threshold += TTI.adjustInliningThreshold(&CandidateCall);
  1043. Threshold *= TTI.getInliningThresholdMultiplier();
  1044. SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
  1045. VectorBonus = Threshold * VectorBonusPercent / 100;
  1046. Threshold += (SingleBBBonus + VectorBonus);
  1047. return InlineResult::success();
  1048. }
  1049. public:
  1050. InlineCostFeaturesAnalyzer(
  1051. const TargetTransformInfo &TTI,
  1052. function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
  1053. function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
  1054. ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee,
  1055. CallBase &Call)
  1056. : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {}
  1057. const InlineCostFeatures &features() const { return Cost; }
  1058. };
  1059. } // namespace
  1060. /// Test whether the given value is an Alloca-derived function argument.
  1061. bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
  1062. return SROAArgValues.count(V);
  1063. }
  1064. void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
  1065. onDisableSROA(SROAArg);
  1066. EnabledSROAAllocas.erase(SROAArg);
  1067. disableLoadElimination();
  1068. }
  1069. void InlineCostAnnotationWriter::emitInstructionAnnot(
  1070. const Instruction *I, formatted_raw_ostream &OS) {
  1071. // The cost of inlining of the given instruction is printed always.
  1072. // The threshold delta is printed only when it is non-zero. It happens
  1073. // when we decided to give a bonus at a particular instruction.
  1074. std::optional<InstructionCostDetail> Record = ICCA->getCostDetails(I);
  1075. if (!Record)
  1076. OS << "; No analysis for the instruction";
  1077. else {
  1078. OS << "; cost before = " << Record->CostBefore
  1079. << ", cost after = " << Record->CostAfter
  1080. << ", threshold before = " << Record->ThresholdBefore
  1081. << ", threshold after = " << Record->ThresholdAfter << ", ";
  1082. OS << "cost delta = " << Record->getCostDelta();
  1083. if (Record->hasThresholdChanged())
  1084. OS << ", threshold delta = " << Record->getThresholdDelta();
  1085. }
  1086. auto C = ICCA->getSimplifiedValue(const_cast<Instruction *>(I));
  1087. if (C) {
  1088. OS << ", simplified to ";
  1089. (*C)->print(OS, true);
  1090. }
  1091. OS << "\n";
  1092. }
  1093. /// If 'V' maps to a SROA candidate, disable SROA for it.
  1094. void CallAnalyzer::disableSROA(Value *V) {
  1095. if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
  1096. disableSROAForArg(SROAArg);
  1097. }
  1098. }
  1099. void CallAnalyzer::disableLoadElimination() {
  1100. if (EnableLoadElimination) {
  1101. onDisableLoadElimination();
  1102. EnableLoadElimination = false;
  1103. }
  1104. }
  1105. /// Accumulate a constant GEP offset into an APInt if possible.
  1106. ///
  1107. /// Returns false if unable to compute the offset for any reason. Respects any
  1108. /// simplified values known during the analysis of this callsite.
  1109. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
  1110. unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType());
  1111. assert(IntPtrWidth == Offset.getBitWidth());
  1112. for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
  1113. GTI != GTE; ++GTI) {
  1114. ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
  1115. if (!OpC)
  1116. if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
  1117. OpC = dyn_cast<ConstantInt>(SimpleOp);
  1118. if (!OpC)
  1119. return false;
  1120. if (OpC->isZero())
  1121. continue;
  1122. // Handle a struct index, which adds its field offset to the pointer.
  1123. if (StructType *STy = GTI.getStructTypeOrNull()) {
  1124. unsigned ElementIdx = OpC->getZExtValue();
  1125. const StructLayout *SL = DL.getStructLayout(STy);
  1126. Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
  1127. continue;
  1128. }
  1129. APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
  1130. Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
  1131. }
  1132. return true;
  1133. }
  1134. /// Use TTI to check whether a GEP is free.
  1135. ///
  1136. /// Respects any simplified values known during the analysis of this callsite.
  1137. bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
  1138. SmallVector<Value *, 4> Operands;
  1139. Operands.push_back(GEP.getOperand(0));
  1140. for (const Use &Op : GEP.indices())
  1141. if (Constant *SimpleOp = SimplifiedValues.lookup(Op))
  1142. Operands.push_back(SimpleOp);
  1143. else
  1144. Operands.push_back(Op);
  1145. return TTI.getInstructionCost(&GEP, Operands,
  1146. TargetTransformInfo::TCK_SizeAndLatency) ==
  1147. TargetTransformInfo::TCC_Free;
  1148. }
  1149. bool CallAnalyzer::visitAlloca(AllocaInst &I) {
  1150. disableSROA(I.getOperand(0));
  1151. // Check whether inlining will turn a dynamic alloca into a static
  1152. // alloca and handle that case.
  1153. if (I.isArrayAllocation()) {
  1154. Constant *Size = SimplifiedValues.lookup(I.getArraySize());
  1155. if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
  1156. // Sometimes a dynamic alloca could be converted into a static alloca
  1157. // after this constant prop, and become a huge static alloca on an
  1158. // unconditional CFG path. Avoid inlining if this is going to happen above
  1159. // a threshold.
  1160. // FIXME: If the threshold is removed or lowered too much, we could end up
  1161. // being too pessimistic and prevent inlining non-problematic code. This
  1162. // could result in unintended perf regressions. A better overall strategy
  1163. // is needed to track stack usage during inlining.
  1164. Type *Ty = I.getAllocatedType();
  1165. AllocatedSize = SaturatingMultiplyAdd(
  1166. AllocSize->getLimitedValue(),
  1167. DL.getTypeAllocSize(Ty).getKnownMinValue(), AllocatedSize);
  1168. if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline)
  1169. HasDynamicAlloca = true;
  1170. return false;
  1171. }
  1172. }
  1173. // Accumulate the allocated size.
  1174. if (I.isStaticAlloca()) {
  1175. Type *Ty = I.getAllocatedType();
  1176. AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinValue(),
  1177. AllocatedSize);
  1178. }
  1179. // FIXME: This is overly conservative. Dynamic allocas are inefficient for
  1180. // a variety of reasons, and so we would like to not inline them into
  1181. // functions which don't currently have a dynamic alloca. This simply
  1182. // disables inlining altogether in the presence of a dynamic alloca.
  1183. if (!I.isStaticAlloca())
  1184. HasDynamicAlloca = true;
  1185. return false;
  1186. }
  1187. bool CallAnalyzer::visitPHI(PHINode &I) {
  1188. // FIXME: We need to propagate SROA *disabling* through phi nodes, even
  1189. // though we don't want to propagate it's bonuses. The idea is to disable
  1190. // SROA if it *might* be used in an inappropriate manner.
  1191. // Phi nodes are always zero-cost.
  1192. // FIXME: Pointer sizes may differ between different address spaces, so do we
  1193. // need to use correct address space in the call to getPointerSizeInBits here?
  1194. // Or could we skip the getPointerSizeInBits call completely? As far as I can
  1195. // see the ZeroOffset is used as a dummy value, so we can probably use any
  1196. // bit width for the ZeroOffset?
  1197. APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
  1198. bool CheckSROA = I.getType()->isPointerTy();
  1199. // Track the constant or pointer with constant offset we've seen so far.
  1200. Constant *FirstC = nullptr;
  1201. std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset};
  1202. Value *FirstV = nullptr;
  1203. for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) {
  1204. BasicBlock *Pred = I.getIncomingBlock(i);
  1205. // If the incoming block is dead, skip the incoming block.
  1206. if (DeadBlocks.count(Pred))
  1207. continue;
  1208. // If the parent block of phi is not the known successor of the incoming
  1209. // block, skip the incoming block.
  1210. BasicBlock *KnownSuccessor = KnownSuccessors[Pred];
  1211. if (KnownSuccessor && KnownSuccessor != I.getParent())
  1212. continue;
  1213. Value *V = I.getIncomingValue(i);
  1214. // If the incoming value is this phi itself, skip the incoming value.
  1215. if (&I == V)
  1216. continue;
  1217. Constant *C = dyn_cast<Constant>(V);
  1218. if (!C)
  1219. C = SimplifiedValues.lookup(V);
  1220. std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset};
  1221. if (!C && CheckSROA)
  1222. BaseAndOffset = ConstantOffsetPtrs.lookup(V);
  1223. if (!C && !BaseAndOffset.first)
  1224. // The incoming value is neither a constant nor a pointer with constant
  1225. // offset, exit early.
  1226. return true;
  1227. if (FirstC) {
  1228. if (FirstC == C)
  1229. // If we've seen a constant incoming value before and it is the same
  1230. // constant we see this time, continue checking the next incoming value.
  1231. continue;
  1232. // Otherwise early exit because we either see a different constant or saw
  1233. // a constant before but we have a pointer with constant offset this time.
  1234. return true;
  1235. }
  1236. if (FirstV) {
  1237. // The same logic as above, but check pointer with constant offset here.
  1238. if (FirstBaseAndOffset == BaseAndOffset)
  1239. continue;
  1240. return true;
  1241. }
  1242. if (C) {
  1243. // This is the 1st time we've seen a constant, record it.
  1244. FirstC = C;
  1245. continue;
  1246. }
  1247. // The remaining case is that this is the 1st time we've seen a pointer with
  1248. // constant offset, record it.
  1249. FirstV = V;
  1250. FirstBaseAndOffset = BaseAndOffset;
  1251. }
  1252. // Check if we can map phi to a constant.
  1253. if (FirstC) {
  1254. SimplifiedValues[&I] = FirstC;
  1255. return true;
  1256. }
  1257. // Check if we can map phi to a pointer with constant offset.
  1258. if (FirstBaseAndOffset.first) {
  1259. ConstantOffsetPtrs[&I] = FirstBaseAndOffset;
  1260. if (auto *SROAArg = getSROAArgForValueOrNull(FirstV))
  1261. SROAArgValues[&I] = SROAArg;
  1262. }
  1263. return true;
  1264. }
  1265. /// Check we can fold GEPs of constant-offset call site argument pointers.
  1266. /// This requires target data and inbounds GEPs.
  1267. ///
  1268. /// \return true if the specified GEP can be folded.
  1269. bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) {
  1270. // Check if we have a base + offset for the pointer.
  1271. std::pair<Value *, APInt> BaseAndOffset =
  1272. ConstantOffsetPtrs.lookup(I.getPointerOperand());
  1273. if (!BaseAndOffset.first)
  1274. return false;
  1275. // Check if the offset of this GEP is constant, and if so accumulate it
  1276. // into Offset.
  1277. if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second))
  1278. return false;
  1279. // Add the result as a new mapping to Base + Offset.
  1280. ConstantOffsetPtrs[&I] = BaseAndOffset;
  1281. return true;
  1282. }
  1283. bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
  1284. auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand());
  1285. // Lambda to check whether a GEP's indices are all constant.
  1286. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
  1287. for (const Use &Op : GEP.indices())
  1288. if (!isa<Constant>(Op) && !SimplifiedValues.lookup(Op))
  1289. return false;
  1290. return true;
  1291. };
  1292. if (!DisableGEPConstOperand)
  1293. if (simplifyInstruction(I))
  1294. return true;
  1295. if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) {
  1296. if (SROAArg)
  1297. SROAArgValues[&I] = SROAArg;
  1298. // Constant GEPs are modeled as free.
  1299. return true;
  1300. }
  1301. // Variable GEPs will require math and will disable SROA.
  1302. if (SROAArg)
  1303. disableSROAForArg(SROAArg);
  1304. return isGEPFree(I);
  1305. }
  1306. /// Simplify \p I if its operands are constants and update SimplifiedValues.
  1307. bool CallAnalyzer::simplifyInstruction(Instruction &I) {
  1308. SmallVector<Constant *> COps;
  1309. for (Value *Op : I.operands()) {
  1310. Constant *COp = dyn_cast<Constant>(Op);
  1311. if (!COp)
  1312. COp = SimplifiedValues.lookup(Op);
  1313. if (!COp)
  1314. return false;
  1315. COps.push_back(COp);
  1316. }
  1317. auto *C = ConstantFoldInstOperands(&I, COps, DL);
  1318. if (!C)
  1319. return false;
  1320. SimplifiedValues[&I] = C;
  1321. return true;
  1322. }
  1323. /// Try to simplify a call to llvm.is.constant.
  1324. ///
  1325. /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
  1326. /// we expect calls of this specific intrinsic to be infrequent.
  1327. ///
  1328. /// FIXME: Given that we know CB's parent (F) caller
  1329. /// (CandidateCall->getParent()->getParent()), we might be able to determine
  1330. /// whether inlining F into F's caller would change how the call to
  1331. /// llvm.is.constant would evaluate.
  1332. bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
  1333. Value *Arg = CB.getArgOperand(0);
  1334. auto *C = dyn_cast<Constant>(Arg);
  1335. if (!C)
  1336. C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
  1337. Type *RT = CB.getFunctionType()->getReturnType();
  1338. SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
  1339. return true;
  1340. }
  1341. bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
  1342. // As per the langref, "The fourth argument to llvm.objectsize determines if
  1343. // the value should be evaluated at runtime."
  1344. if(cast<ConstantInt>(CB.getArgOperand(3))->isOne())
  1345. return false;
  1346. Value *V = lowerObjectSizeCall(&cast<IntrinsicInst>(CB), DL, nullptr,
  1347. /*MustSucceed=*/true);
  1348. Constant *C = dyn_cast_or_null<Constant>(V);
  1349. if (C)
  1350. SimplifiedValues[&CB] = C;
  1351. return C;
  1352. }
  1353. bool CallAnalyzer::visitBitCast(BitCastInst &I) {
  1354. // Propagate constants through bitcasts.
  1355. if (simplifyInstruction(I))
  1356. return true;
  1357. // Track base/offsets through casts
  1358. std::pair<Value *, APInt> BaseAndOffset =
  1359. ConstantOffsetPtrs.lookup(I.getOperand(0));
  1360. // Casts don't change the offset, just wrap it up.
  1361. if (BaseAndOffset.first)
  1362. ConstantOffsetPtrs[&I] = BaseAndOffset;
  1363. // Also look for SROA candidates here.
  1364. if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
  1365. SROAArgValues[&I] = SROAArg;
  1366. // Bitcasts are always zero cost.
  1367. return true;
  1368. }
  1369. bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
  1370. // Propagate constants through ptrtoint.
  1371. if (simplifyInstruction(I))
  1372. return true;
  1373. // Track base/offset pairs when converted to a plain integer provided the
  1374. // integer is large enough to represent the pointer.
  1375. unsigned IntegerSize = I.getType()->getScalarSizeInBits();
  1376. unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace();
  1377. if (IntegerSize == DL.getPointerSizeInBits(AS)) {
  1378. std::pair<Value *, APInt> BaseAndOffset =
  1379. ConstantOffsetPtrs.lookup(I.getOperand(0));
  1380. if (BaseAndOffset.first)
  1381. ConstantOffsetPtrs[&I] = BaseAndOffset;
  1382. }
  1383. // This is really weird. Technically, ptrtoint will disable SROA. However,
  1384. // unless that ptrtoint is *used* somewhere in the live basic blocks after
  1385. // inlining, it will be nuked, and SROA should proceed. All of the uses which
  1386. // would block SROA would also block SROA if applied directly to a pointer,
  1387. // and so we can just add the integer in here. The only places where SROA is
  1388. // preserved either cannot fire on an integer, or won't in-and-of themselves
  1389. // disable SROA (ext) w/o some later use that we would see and disable.
  1390. if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0)))
  1391. SROAArgValues[&I] = SROAArg;
  1392. return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
  1393. TargetTransformInfo::TCC_Free;
  1394. }
  1395. bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
  1396. // Propagate constants through ptrtoint.
  1397. if (simplifyInstruction(I))
  1398. return true;
  1399. // Track base/offset pairs when round-tripped through a pointer without
  1400. // modifications provided the integer is not too large.
  1401. Value *Op = I.getOperand(0);
  1402. unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
  1403. if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) {
  1404. std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
  1405. if (BaseAndOffset.first)
  1406. ConstantOffsetPtrs[&I] = BaseAndOffset;
  1407. }
  1408. // "Propagate" SROA here in the same manner as we do for ptrtoint above.
  1409. if (auto *SROAArg = getSROAArgForValueOrNull(Op))
  1410. SROAArgValues[&I] = SROAArg;
  1411. return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
  1412. TargetTransformInfo::TCC_Free;
  1413. }
  1414. bool CallAnalyzer::visitCastInst(CastInst &I) {
  1415. // Propagate constants through casts.
  1416. if (simplifyInstruction(I))
  1417. return true;
  1418. // Disable SROA in the face of arbitrary casts we don't explicitly list
  1419. // elsewhere.
  1420. disableSROA(I.getOperand(0));
  1421. // If this is a floating-point cast, and the target says this operation
  1422. // is expensive, this may eventually become a library call. Treat the cost
  1423. // as such.
  1424. switch (I.getOpcode()) {
  1425. case Instruction::FPTrunc:
  1426. case Instruction::FPExt:
  1427. case Instruction::UIToFP:
  1428. case Instruction::SIToFP:
  1429. case Instruction::FPToUI:
  1430. case Instruction::FPToSI:
  1431. if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
  1432. onCallPenalty();
  1433. break;
  1434. default:
  1435. break;
  1436. }
  1437. return TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
  1438. TargetTransformInfo::TCC_Free;
  1439. }
  1440. bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
  1441. return CandidateCall.paramHasAttr(A->getArgNo(), Attr);
  1442. }
  1443. bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
  1444. // Does the *call site* have the NonNull attribute set on an argument? We
  1445. // use the attribute on the call site to memoize any analysis done in the
  1446. // caller. This will also trip if the callee function has a non-null
  1447. // parameter attribute, but that's a less interesting case because hopefully
  1448. // the callee would already have been simplified based on that.
  1449. if (Argument *A = dyn_cast<Argument>(V))
  1450. if (paramHasAttr(A, Attribute::NonNull))
  1451. return true;
  1452. // Is this an alloca in the caller? This is distinct from the attribute case
  1453. // above because attributes aren't updated within the inliner itself and we
  1454. // always want to catch the alloca derived case.
  1455. if (isAllocaDerivedArg(V))
  1456. // We can actually predict the result of comparisons between an
  1457. // alloca-derived value and null. Note that this fires regardless of
  1458. // SROA firing.
  1459. return true;
  1460. return false;
  1461. }
  1462. bool CallAnalyzer::allowSizeGrowth(CallBase &Call) {
  1463. // If the normal destination of the invoke or the parent block of the call
  1464. // site is unreachable-terminated, there is little point in inlining this
  1465. // unless there is literally zero cost.
  1466. // FIXME: Note that it is possible that an unreachable-terminated block has a
  1467. // hot entry. For example, in below scenario inlining hot_call_X() may be
  1468. // beneficial :
  1469. // main() {
  1470. // hot_call_1();
  1471. // ...
  1472. // hot_call_N()
  1473. // exit(0);
  1474. // }
  1475. // For now, we are not handling this corner case here as it is rare in real
  1476. // code. In future, we should elaborate this based on BPI and BFI in more
  1477. // general threshold adjusting heuristics in updateThreshold().
  1478. if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
  1479. if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
  1480. return false;
  1481. } else if (isa<UnreachableInst>(Call.getParent()->getTerminator()))
  1482. return false;
  1483. return true;
  1484. }
  1485. bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call,
  1486. BlockFrequencyInfo *CallerBFI) {
  1487. // If global profile summary is available, then callsite's coldness is
  1488. // determined based on that.
  1489. if (PSI && PSI->hasProfileSummary())
  1490. return PSI->isColdCallSite(Call, CallerBFI);
  1491. // Otherwise we need BFI to be available.
  1492. if (!CallerBFI)
  1493. return false;
  1494. // Determine if the callsite is cold relative to caller's entry. We could
  1495. // potentially cache the computation of scaled entry frequency, but the added
  1496. // complexity is not worth it unless this scaling shows up high in the
  1497. // profiles.
  1498. const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
  1499. auto CallSiteBB = Call.getParent();
  1500. auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
  1501. auto CallerEntryFreq =
  1502. CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock()));
  1503. return CallSiteFreq < CallerEntryFreq * ColdProb;
  1504. }
  1505. std::optional<int>
  1506. InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call,
  1507. BlockFrequencyInfo *CallerBFI) {
  1508. // If global profile summary is available, then callsite's hotness is
  1509. // determined based on that.
  1510. if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI))
  1511. return Params.HotCallSiteThreshold;
  1512. // Otherwise we need BFI to be available and to have a locally hot callsite
  1513. // threshold.
  1514. if (!CallerBFI || !Params.LocallyHotCallSiteThreshold)
  1515. return std::nullopt;
  1516. // Determine if the callsite is hot relative to caller's entry. We could
  1517. // potentially cache the computation of scaled entry frequency, but the added
  1518. // complexity is not worth it unless this scaling shows up high in the
  1519. // profiles.
  1520. auto CallSiteBB = Call.getParent();
  1521. auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency();
  1522. auto CallerEntryFreq = CallerBFI->getEntryFreq();
  1523. if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq)
  1524. return Params.LocallyHotCallSiteThreshold;
  1525. // Otherwise treat it normally.
  1526. return std::nullopt;
  1527. }
  1528. void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
  1529. // If no size growth is allowed for this inlining, set Threshold to 0.
  1530. if (!allowSizeGrowth(Call)) {
  1531. Threshold = 0;
  1532. return;
  1533. }
  1534. Function *Caller = Call.getCaller();
  1535. // return min(A, B) if B is valid.
  1536. auto MinIfValid = [](int A, std::optional<int> B) {
  1537. return B ? std::min(A, *B) : A;
  1538. };
  1539. // return max(A, B) if B is valid.
  1540. auto MaxIfValid = [](int A, std::optional<int> B) {
  1541. return B ? std::max(A, *B) : A;
  1542. };
  1543. // Various bonus percentages. These are multiplied by Threshold to get the
  1544. // bonus values.
  1545. // SingleBBBonus: This bonus is applied if the callee has a single reachable
  1546. // basic block at the given callsite context. This is speculatively applied
  1547. // and withdrawn if more than one basic block is seen.
  1548. //
  1549. // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
  1550. // of the last call to a static function as inlining such functions is
  1551. // guaranteed to reduce code size.
  1552. //
  1553. // These bonus percentages may be set to 0 based on properties of the caller
  1554. // and the callsite.
  1555. int SingleBBBonusPercent = 50;
  1556. int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
  1557. int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
  1558. // Lambda to set all the above bonus and bonus percentages to 0.
  1559. auto DisallowAllBonuses = [&]() {
  1560. SingleBBBonusPercent = 0;
  1561. VectorBonusPercent = 0;
  1562. LastCallToStaticBonus = 0;
  1563. };
  1564. // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
  1565. // and reduce the threshold if the caller has the necessary attribute.
  1566. if (Caller->hasMinSize()) {
  1567. Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
  1568. // For minsize, we want to disable the single BB bonus and the vector
  1569. // bonuses, but not the last-call-to-static bonus. Inlining the last call to
  1570. // a static function will, at the minimum, eliminate the parameter setup and
  1571. // call/return instructions.
  1572. SingleBBBonusPercent = 0;
  1573. VectorBonusPercent = 0;
  1574. } else if (Caller->hasOptSize())
  1575. Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
  1576. // Adjust the threshold based on inlinehint attribute and profile based
  1577. // hotness information if the caller does not have MinSize attribute.
  1578. if (!Caller->hasMinSize()) {
  1579. if (Callee.hasFnAttribute(Attribute::InlineHint))
  1580. Threshold = MaxIfValid(Threshold, Params.HintThreshold);
  1581. // FIXME: After switching to the new passmanager, simplify the logic below
  1582. // by checking only the callsite hotness/coldness as we will reliably
  1583. // have local profile information.
  1584. //
  1585. // Callsite hotness and coldness can be determined if sample profile is
  1586. // used (which adds hotness metadata to calls) or if caller's
  1587. // BlockFrequencyInfo is available.
  1588. BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr;
  1589. auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI);
  1590. if (!Caller->hasOptSize() && HotCallSiteThreshold) {
  1591. LLVM_DEBUG(dbgs() << "Hot callsite.\n");
  1592. // FIXME: This should update the threshold only if it exceeds the
  1593. // current threshold, but AutoFDO + ThinLTO currently relies on this
  1594. // behavior to prevent inlining of hot callsites during ThinLTO
  1595. // compile phase.
  1596. Threshold = *HotCallSiteThreshold;
  1597. } else if (isColdCallSite(Call, CallerBFI)) {
  1598. LLVM_DEBUG(dbgs() << "Cold callsite.\n");
  1599. // Do not apply bonuses for a cold callsite including the
  1600. // LastCallToStatic bonus. While this bonus might result in code size
  1601. // reduction, it can cause the size of a non-cold caller to increase
  1602. // preventing it from being inlined.
  1603. DisallowAllBonuses();
  1604. Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
  1605. } else if (PSI) {
  1606. // Use callee's global profile information only if we have no way of
  1607. // determining this via callsite information.
  1608. if (PSI->isFunctionEntryHot(&Callee)) {
  1609. LLVM_DEBUG(dbgs() << "Hot callee.\n");
  1610. // If callsite hotness can not be determined, we may still know
  1611. // that the callee is hot and treat it as a weaker hint for threshold
  1612. // increase.
  1613. Threshold = MaxIfValid(Threshold, Params.HintThreshold);
  1614. } else if (PSI->isFunctionEntryCold(&Callee)) {
  1615. LLVM_DEBUG(dbgs() << "Cold callee.\n");
  1616. // Do not apply bonuses for a cold callee including the
  1617. // LastCallToStatic bonus. While this bonus might result in code size
  1618. // reduction, it can cause the size of a non-cold caller to increase
  1619. // preventing it from being inlined.
  1620. DisallowAllBonuses();
  1621. Threshold = MinIfValid(Threshold, Params.ColdThreshold);
  1622. }
  1623. }
  1624. }
  1625. Threshold += TTI.adjustInliningThreshold(&Call);
  1626. // Finally, take the target-specific inlining threshold multiplier into
  1627. // account.
  1628. Threshold *= TTI.getInliningThresholdMultiplier();
  1629. SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
  1630. VectorBonus = Threshold * VectorBonusPercent / 100;
  1631. // If there is only one call of the function, and it has internal linkage,
  1632. // the cost of inlining it drops dramatically. It may seem odd to update
  1633. // Cost in updateThreshold, but the bonus depends on the logic in this method.
  1634. if (isSoleCallToLocalFunction(Call, F)) {
  1635. Cost -= LastCallToStaticBonus;
  1636. StaticBonusApplied = LastCallToStaticBonus;
  1637. }
  1638. }
  1639. bool CallAnalyzer::visitCmpInst(CmpInst &I) {
  1640. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  1641. // First try to handle simplified comparisons.
  1642. if (simplifyInstruction(I))
  1643. return true;
  1644. if (I.getOpcode() == Instruction::FCmp)
  1645. return false;
  1646. // Otherwise look for a comparison between constant offset pointers with
  1647. // a common base.
  1648. Value *LHSBase, *RHSBase;
  1649. APInt LHSOffset, RHSOffset;
  1650. std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
  1651. if (LHSBase) {
  1652. std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
  1653. if (RHSBase && LHSBase == RHSBase) {
  1654. // We have common bases, fold the icmp to a constant based on the
  1655. // offsets.
  1656. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
  1657. Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
  1658. if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
  1659. SimplifiedValues[&I] = C;
  1660. ++NumConstantPtrCmps;
  1661. return true;
  1662. }
  1663. }
  1664. }
  1665. // If the comparison is an equality comparison with null, we can simplify it
  1666. // if we know the value (argument) can't be null
  1667. if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
  1668. isKnownNonNullInCallee(I.getOperand(0))) {
  1669. bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
  1670. SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
  1671. : ConstantInt::getFalse(I.getType());
  1672. return true;
  1673. }
  1674. return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
  1675. }
  1676. bool CallAnalyzer::visitSub(BinaryOperator &I) {
  1677. // Try to handle a special case: we can fold computing the difference of two
  1678. // constant-related pointers.
  1679. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  1680. Value *LHSBase, *RHSBase;
  1681. APInt LHSOffset, RHSOffset;
  1682. std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
  1683. if (LHSBase) {
  1684. std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
  1685. if (RHSBase && LHSBase == RHSBase) {
  1686. // We have common bases, fold the subtract to a constant based on the
  1687. // offsets.
  1688. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
  1689. Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
  1690. if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
  1691. SimplifiedValues[&I] = C;
  1692. ++NumConstantPtrDiffs;
  1693. return true;
  1694. }
  1695. }
  1696. }
  1697. // Otherwise, fall back to the generic logic for simplifying and handling
  1698. // instructions.
  1699. return Base::visitSub(I);
  1700. }
  1701. bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
  1702. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
  1703. Constant *CLHS = dyn_cast<Constant>(LHS);
  1704. if (!CLHS)
  1705. CLHS = SimplifiedValues.lookup(LHS);
  1706. Constant *CRHS = dyn_cast<Constant>(RHS);
  1707. if (!CRHS)
  1708. CRHS = SimplifiedValues.lookup(RHS);
  1709. Value *SimpleV = nullptr;
  1710. if (auto FI = dyn_cast<FPMathOperator>(&I))
  1711. SimpleV = simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS,
  1712. FI->getFastMathFlags(), DL);
  1713. else
  1714. SimpleV =
  1715. simplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
  1716. if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
  1717. SimplifiedValues[&I] = C;
  1718. if (SimpleV)
  1719. return true;
  1720. // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
  1721. disableSROA(LHS);
  1722. disableSROA(RHS);
  1723. // If the instruction is floating point, and the target says this operation
  1724. // is expensive, this may eventually become a library call. Treat the cost
  1725. // as such. Unless it's fneg which can be implemented with an xor.
  1726. using namespace llvm::PatternMatch;
  1727. if (I.getType()->isFloatingPointTy() &&
  1728. TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive &&
  1729. !match(&I, m_FNeg(m_Value())))
  1730. onCallPenalty();
  1731. return false;
  1732. }
  1733. bool CallAnalyzer::visitFNeg(UnaryOperator &I) {
  1734. Value *Op = I.getOperand(0);
  1735. Constant *COp = dyn_cast<Constant>(Op);
  1736. if (!COp)
  1737. COp = SimplifiedValues.lookup(Op);
  1738. Value *SimpleV = simplifyFNegInst(
  1739. COp ? COp : Op, cast<FPMathOperator>(I).getFastMathFlags(), DL);
  1740. if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
  1741. SimplifiedValues[&I] = C;
  1742. if (SimpleV)
  1743. return true;
  1744. // Disable any SROA on arguments to arbitrary, unsimplified fneg.
  1745. disableSROA(Op);
  1746. return false;
  1747. }
  1748. bool CallAnalyzer::visitLoad(LoadInst &I) {
  1749. if (handleSROA(I.getPointerOperand(), I.isSimple()))
  1750. return true;
  1751. // If the data is already loaded from this address and hasn't been clobbered
  1752. // by any stores or calls, this load is likely to be redundant and can be
  1753. // eliminated.
  1754. if (EnableLoadElimination &&
  1755. !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) {
  1756. onLoadEliminationOpportunity();
  1757. return true;
  1758. }
  1759. onMemAccess();
  1760. return false;
  1761. }
  1762. bool CallAnalyzer::visitStore(StoreInst &I) {
  1763. if (handleSROA(I.getPointerOperand(), I.isSimple()))
  1764. return true;
  1765. // The store can potentially clobber loads and prevent repeated loads from
  1766. // being eliminated.
  1767. // FIXME:
  1768. // 1. We can probably keep an initial set of eliminatable loads substracted
  1769. // from the cost even when we finally see a store. We just need to disable
  1770. // *further* accumulation of elimination savings.
  1771. // 2. We should probably at some point thread MemorySSA for the callee into
  1772. // this and then use that to actually compute *really* precise savings.
  1773. disableLoadElimination();
  1774. onMemAccess();
  1775. return false;
  1776. }
  1777. bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
  1778. // Constant folding for extract value is trivial.
  1779. if (simplifyInstruction(I))
  1780. return true;
  1781. // SROA can't look through these, but they may be free.
  1782. return Base::visitExtractValue(I);
  1783. }
  1784. bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
  1785. // Constant folding for insert value is trivial.
  1786. if (simplifyInstruction(I))
  1787. return true;
  1788. // SROA can't look through these, but they may be free.
  1789. return Base::visitInsertValue(I);
  1790. }
  1791. /// Try to simplify a call site.
  1792. ///
  1793. /// Takes a concrete function and callsite and tries to actually simplify it by
  1794. /// analyzing the arguments and call itself with instsimplify. Returns true if
  1795. /// it has simplified the callsite to some other entity (a constant), making it
  1796. /// free.
  1797. bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
  1798. // FIXME: Using the instsimplify logic directly for this is inefficient
  1799. // because we have to continually rebuild the argument list even when no
  1800. // simplifications can be performed. Until that is fixed with remapping
  1801. // inside of instsimplify, directly constant fold calls here.
  1802. if (!canConstantFoldCallTo(&Call, F))
  1803. return false;
  1804. // Try to re-map the arguments to constants.
  1805. SmallVector<Constant *, 4> ConstantArgs;
  1806. ConstantArgs.reserve(Call.arg_size());
  1807. for (Value *I : Call.args()) {
  1808. Constant *C = dyn_cast<Constant>(I);
  1809. if (!C)
  1810. C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(I));
  1811. if (!C)
  1812. return false; // This argument doesn't map to a constant.
  1813. ConstantArgs.push_back(C);
  1814. }
  1815. if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) {
  1816. SimplifiedValues[&Call] = C;
  1817. return true;
  1818. }
  1819. return false;
  1820. }
  1821. bool CallAnalyzer::visitCallBase(CallBase &Call) {
  1822. if (!onCallBaseVisitStart(Call))
  1823. return true;
  1824. if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
  1825. !F.hasFnAttribute(Attribute::ReturnsTwice)) {
  1826. // This aborts the entire analysis.
  1827. ExposesReturnsTwice = true;
  1828. return false;
  1829. }
  1830. if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate())
  1831. ContainsNoDuplicateCall = true;
  1832. Function *F = Call.getCalledFunction();
  1833. bool IsIndirectCall = !F;
  1834. if (IsIndirectCall) {
  1835. // Check if this happens to be an indirect function call to a known function
  1836. // in this inline context. If not, we've done all we can.
  1837. Value *Callee = Call.getCalledOperand();
  1838. F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
  1839. if (!F || F->getFunctionType() != Call.getFunctionType()) {
  1840. onCallArgumentSetup(Call);
  1841. if (!Call.onlyReadsMemory())
  1842. disableLoadElimination();
  1843. return Base::visitCallBase(Call);
  1844. }
  1845. }
  1846. assert(F && "Expected a call to a known function");
  1847. // When we have a concrete function, first try to simplify it directly.
  1848. if (simplifyCallSite(F, Call))
  1849. return true;
  1850. // Next check if it is an intrinsic we know about.
  1851. // FIXME: Lift this into part of the InstVisitor.
  1852. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) {
  1853. switch (II->getIntrinsicID()) {
  1854. default:
  1855. if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
  1856. disableLoadElimination();
  1857. return Base::visitCallBase(Call);
  1858. case Intrinsic::load_relative:
  1859. onLoadRelativeIntrinsic();
  1860. return false;
  1861. case Intrinsic::memset:
  1862. case Intrinsic::memcpy:
  1863. case Intrinsic::memmove:
  1864. disableLoadElimination();
  1865. // SROA can usually chew through these intrinsics, but they aren't free.
  1866. return false;
  1867. case Intrinsic::icall_branch_funnel:
  1868. case Intrinsic::localescape:
  1869. HasUninlineableIntrinsic = true;
  1870. return false;
  1871. case Intrinsic::vastart:
  1872. InitsVargArgs = true;
  1873. return false;
  1874. case Intrinsic::launder_invariant_group:
  1875. case Intrinsic::strip_invariant_group:
  1876. if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
  1877. SROAArgValues[II] = SROAArg;
  1878. return true;
  1879. case Intrinsic::is_constant:
  1880. return simplifyIntrinsicCallIsConstant(Call);
  1881. case Intrinsic::objectsize:
  1882. return simplifyIntrinsicCallObjectSize(Call);
  1883. }
  1884. }
  1885. if (F == Call.getFunction()) {
  1886. // This flag will fully abort the analysis, so don't bother with anything
  1887. // else.
  1888. IsRecursiveCall = true;
  1889. if (!AllowRecursiveCall)
  1890. return false;
  1891. }
  1892. if (TTI.isLoweredToCall(F)) {
  1893. onLoweredCall(F, Call, IsIndirectCall);
  1894. }
  1895. if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory())))
  1896. disableLoadElimination();
  1897. return Base::visitCallBase(Call);
  1898. }
  1899. bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
  1900. // At least one return instruction will be free after inlining.
  1901. bool Free = !HasReturn;
  1902. HasReturn = true;
  1903. return Free;
  1904. }
  1905. bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
  1906. // We model unconditional branches as essentially free -- they really
  1907. // shouldn't exist at all, but handling them makes the behavior of the
  1908. // inliner more regular and predictable. Interestingly, conditional branches
  1909. // which will fold away are also free.
  1910. return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
  1911. isa_and_nonnull<ConstantInt>(
  1912. SimplifiedValues.lookup(BI.getCondition()));
  1913. }
  1914. bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
  1915. bool CheckSROA = SI.getType()->isPointerTy();
  1916. Value *TrueVal = SI.getTrueValue();
  1917. Value *FalseVal = SI.getFalseValue();
  1918. Constant *TrueC = dyn_cast<Constant>(TrueVal);
  1919. if (!TrueC)
  1920. TrueC = SimplifiedValues.lookup(TrueVal);
  1921. Constant *FalseC = dyn_cast<Constant>(FalseVal);
  1922. if (!FalseC)
  1923. FalseC = SimplifiedValues.lookup(FalseVal);
  1924. Constant *CondC =
  1925. dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition()));
  1926. if (!CondC) {
  1927. // Select C, X, X => X
  1928. if (TrueC == FalseC && TrueC) {
  1929. SimplifiedValues[&SI] = TrueC;
  1930. return true;
  1931. }
  1932. if (!CheckSROA)
  1933. return Base::visitSelectInst(SI);
  1934. std::pair<Value *, APInt> TrueBaseAndOffset =
  1935. ConstantOffsetPtrs.lookup(TrueVal);
  1936. std::pair<Value *, APInt> FalseBaseAndOffset =
  1937. ConstantOffsetPtrs.lookup(FalseVal);
  1938. if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) {
  1939. ConstantOffsetPtrs[&SI] = TrueBaseAndOffset;
  1940. if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal))
  1941. SROAArgValues[&SI] = SROAArg;
  1942. return true;
  1943. }
  1944. return Base::visitSelectInst(SI);
  1945. }
  1946. // Select condition is a constant.
  1947. Value *SelectedV = CondC->isAllOnesValue() ? TrueVal
  1948. : (CondC->isNullValue()) ? FalseVal
  1949. : nullptr;
  1950. if (!SelectedV) {
  1951. // Condition is a vector constant that is not all 1s or all 0s. If all
  1952. // operands are constants, ConstantExpr::getSelect() can handle the cases
  1953. // such as select vectors.
  1954. if (TrueC && FalseC) {
  1955. if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
  1956. SimplifiedValues[&SI] = C;
  1957. return true;
  1958. }
  1959. }
  1960. return Base::visitSelectInst(SI);
  1961. }
  1962. // Condition is either all 1s or all 0s. SI can be simplified.
  1963. if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) {
  1964. SimplifiedValues[&SI] = SelectedC;
  1965. return true;
  1966. }
  1967. if (!CheckSROA)
  1968. return true;
  1969. std::pair<Value *, APInt> BaseAndOffset =
  1970. ConstantOffsetPtrs.lookup(SelectedV);
  1971. if (BaseAndOffset.first) {
  1972. ConstantOffsetPtrs[&SI] = BaseAndOffset;
  1973. if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV))
  1974. SROAArgValues[&SI] = SROAArg;
  1975. }
  1976. return true;
  1977. }
  1978. bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
  1979. // We model unconditional switches as free, see the comments on handling
  1980. // branches.
  1981. if (isa<ConstantInt>(SI.getCondition()))
  1982. return true;
  1983. if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
  1984. if (isa<ConstantInt>(V))
  1985. return true;
  1986. // Assume the most general case where the switch is lowered into
  1987. // either a jump table, bit test, or a balanced binary tree consisting of
  1988. // case clusters without merging adjacent clusters with the same
  1989. // destination. We do not consider the switches that are lowered with a mix
  1990. // of jump table/bit test/binary search tree. The cost of the switch is
  1991. // proportional to the size of the tree or the size of jump table range.
  1992. //
  1993. // NB: We convert large switches which are just used to initialize large phi
  1994. // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent
  1995. // inlining those. It will prevent inlining in cases where the optimization
  1996. // does not (yet) fire.
  1997. unsigned JumpTableSize = 0;
  1998. BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr;
  1999. unsigned NumCaseCluster =
  2000. TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI);
  2001. onFinalizeSwitch(JumpTableSize, NumCaseCluster);
  2002. return false;
  2003. }
  2004. bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
  2005. // We never want to inline functions that contain an indirectbr. This is
  2006. // incorrect because all the blockaddress's (in static global initializers
  2007. // for example) would be referring to the original function, and this
  2008. // indirect jump would jump from the inlined copy of the function into the
  2009. // original function which is extremely undefined behavior.
  2010. // FIXME: This logic isn't really right; we can safely inline functions with
  2011. // indirectbr's as long as no other function or global references the
  2012. // blockaddress of a block within the current function.
  2013. HasIndirectBr = true;
  2014. return false;
  2015. }
  2016. bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
  2017. // FIXME: It's not clear that a single instruction is an accurate model for
  2018. // the inline cost of a resume instruction.
  2019. return false;
  2020. }
  2021. bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
  2022. // FIXME: It's not clear that a single instruction is an accurate model for
  2023. // the inline cost of a cleanupret instruction.
  2024. return false;
  2025. }
  2026. bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
  2027. // FIXME: It's not clear that a single instruction is an accurate model for
  2028. // the inline cost of a catchret instruction.
  2029. return false;
  2030. }
  2031. bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
  2032. // FIXME: It might be reasonably to discount the cost of instructions leading
  2033. // to unreachable as they have the lowest possible impact on both runtime and
  2034. // code size.
  2035. return true; // No actual code is needed for unreachable.
  2036. }
  2037. bool CallAnalyzer::visitInstruction(Instruction &I) {
  2038. // Some instructions are free. All of the free intrinsics can also be
  2039. // handled by SROA, etc.
  2040. if (TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency) ==
  2041. TargetTransformInfo::TCC_Free)
  2042. return true;
  2043. // We found something we don't understand or can't handle. Mark any SROA-able
  2044. // values in the operand list as no longer viable.
  2045. for (const Use &Op : I.operands())
  2046. disableSROA(Op);
  2047. return false;
  2048. }
  2049. /// Analyze a basic block for its contribution to the inline cost.
  2050. ///
  2051. /// This method walks the analyzer over every instruction in the given basic
  2052. /// block and accounts for their cost during inlining at this callsite. It
  2053. /// aborts early if the threshold has been exceeded or an impossible to inline
  2054. /// construct has been detected. It returns false if inlining is no longer
  2055. /// viable, and true if inlining remains viable.
  2056. InlineResult
  2057. CallAnalyzer::analyzeBlock(BasicBlock *BB,
  2058. SmallPtrSetImpl<const Value *> &EphValues) {
  2059. for (Instruction &I : *BB) {
  2060. // FIXME: Currently, the number of instructions in a function regardless of
  2061. // our ability to simplify them during inline to constants or dead code,
  2062. // are actually used by the vector bonus heuristic. As long as that's true,
  2063. // we have to special case debug intrinsics here to prevent differences in
  2064. // inlining due to debug symbols. Eventually, the number of unsimplified
  2065. // instructions shouldn't factor into the cost computation, but until then,
  2066. // hack around it here.
  2067. // Similarly, skip pseudo-probes.
  2068. if (I.isDebugOrPseudoInst())
  2069. continue;
  2070. // Skip ephemeral values.
  2071. if (EphValues.count(&I))
  2072. continue;
  2073. ++NumInstructions;
  2074. if (isa<ExtractElementInst>(I) || I.getType()->isVectorTy())
  2075. ++NumVectorInstructions;
  2076. // If the instruction simplified to a constant, there is no cost to this
  2077. // instruction. Visit the instructions using our InstVisitor to account for
  2078. // all of the per-instruction logic. The visit tree returns true if we
  2079. // consumed the instruction in any way, and false if the instruction's base
  2080. // cost should count against inlining.
  2081. onInstructionAnalysisStart(&I);
  2082. if (Base::visit(&I))
  2083. ++NumInstructionsSimplified;
  2084. else
  2085. onMissedSimplification();
  2086. onInstructionAnalysisFinish(&I);
  2087. using namespace ore;
  2088. // If the visit this instruction detected an uninlinable pattern, abort.
  2089. InlineResult IR = InlineResult::success();
  2090. if (IsRecursiveCall && !AllowRecursiveCall)
  2091. IR = InlineResult::failure("recursive");
  2092. else if (ExposesReturnsTwice)
  2093. IR = InlineResult::failure("exposes returns twice");
  2094. else if (HasDynamicAlloca)
  2095. IR = InlineResult::failure("dynamic alloca");
  2096. else if (HasIndirectBr)
  2097. IR = InlineResult::failure("indirect branch");
  2098. else if (HasUninlineableIntrinsic)
  2099. IR = InlineResult::failure("uninlinable intrinsic");
  2100. else if (InitsVargArgs)
  2101. IR = InlineResult::failure("varargs");
  2102. if (!IR.isSuccess()) {
  2103. if (ORE)
  2104. ORE->emit([&]() {
  2105. return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
  2106. &CandidateCall)
  2107. << NV("Callee", &F) << " has uninlinable pattern ("
  2108. << NV("InlineResult", IR.getFailureReason())
  2109. << ") and cost is not fully computed";
  2110. });
  2111. return IR;
  2112. }
  2113. // If the caller is a recursive function then we don't want to inline
  2114. // functions which allocate a lot of stack space because it would increase
  2115. // the caller stack usage dramatically.
  2116. if (IsCallerRecursive && AllocatedSize > RecurStackSizeThreshold) {
  2117. auto IR =
  2118. InlineResult::failure("recursive and allocates too much stack space");
  2119. if (ORE)
  2120. ORE->emit([&]() {
  2121. return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
  2122. &CandidateCall)
  2123. << NV("Callee", &F) << " is "
  2124. << NV("InlineResult", IR.getFailureReason())
  2125. << ". Cost is not fully computed";
  2126. });
  2127. return IR;
  2128. }
  2129. if (shouldStop())
  2130. return InlineResult::failure(
  2131. "Call site analysis is not favorable to inlining.");
  2132. }
  2133. return InlineResult::success();
  2134. }
  2135. /// Compute the base pointer and cumulative constant offsets for V.
  2136. ///
  2137. /// This strips all constant offsets off of V, leaving it the base pointer, and
  2138. /// accumulates the total constant offset applied in the returned constant. It
  2139. /// returns 0 if V is not a pointer, and returns the constant '0' if there are
  2140. /// no constant offsets applied.
  2141. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
  2142. if (!V->getType()->isPointerTy())
  2143. return nullptr;
  2144. unsigned AS = V->getType()->getPointerAddressSpace();
  2145. unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
  2146. APInt Offset = APInt::getZero(IntPtrWidth);
  2147. // Even though we don't look through PHI nodes, we could be called on an
  2148. // instruction in an unreachable block, which may be on a cycle.
  2149. SmallPtrSet<Value *, 4> Visited;
  2150. Visited.insert(V);
  2151. do {
  2152. if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
  2153. if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
  2154. return nullptr;
  2155. V = GEP->getPointerOperand();
  2156. } else if (Operator::getOpcode(V) == Instruction::BitCast) {
  2157. V = cast<Operator>(V)->getOperand(0);
  2158. } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
  2159. if (GA->isInterposable())
  2160. break;
  2161. V = GA->getAliasee();
  2162. } else {
  2163. break;
  2164. }
  2165. assert(V->getType()->isPointerTy() && "Unexpected operand type!");
  2166. } while (Visited.insert(V).second);
  2167. Type *IdxPtrTy = DL.getIndexType(V->getType());
  2168. return cast<ConstantInt>(ConstantInt::get(IdxPtrTy, Offset));
  2169. }
  2170. /// Find dead blocks due to deleted CFG edges during inlining.
  2171. ///
  2172. /// If we know the successor of the current block, \p CurrBB, has to be \p
  2173. /// NextBB, the other successors of \p CurrBB are dead if these successors have
  2174. /// no live incoming CFG edges. If one block is found to be dead, we can
  2175. /// continue growing the dead block list by checking the successors of the dead
  2176. /// blocks to see if all their incoming edges are dead or not.
  2177. void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
  2178. auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) {
  2179. // A CFG edge is dead if the predecessor is dead or the predecessor has a
  2180. // known successor which is not the one under exam.
  2181. return (DeadBlocks.count(Pred) ||
  2182. (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ));
  2183. };
  2184. auto IsNewlyDead = [&](BasicBlock *BB) {
  2185. // If all the edges to a block are dead, the block is also dead.
  2186. return (!DeadBlocks.count(BB) &&
  2187. llvm::all_of(predecessors(BB),
  2188. [&](BasicBlock *P) { return IsEdgeDead(P, BB); }));
  2189. };
  2190. for (BasicBlock *Succ : successors(CurrBB)) {
  2191. if (Succ == NextBB || !IsNewlyDead(Succ))
  2192. continue;
  2193. SmallVector<BasicBlock *, 4> NewDead;
  2194. NewDead.push_back(Succ);
  2195. while (!NewDead.empty()) {
  2196. BasicBlock *Dead = NewDead.pop_back_val();
  2197. if (DeadBlocks.insert(Dead).second)
  2198. // Continue growing the dead block lists.
  2199. for (BasicBlock *S : successors(Dead))
  2200. if (IsNewlyDead(S))
  2201. NewDead.push_back(S);
  2202. }
  2203. }
  2204. }
  2205. /// Analyze a call site for potential inlining.
  2206. ///
  2207. /// Returns true if inlining this call is viable, and false if it is not
  2208. /// viable. It computes the cost and adjusts the threshold based on numerous
  2209. /// factors and heuristics. If this method returns false but the computed cost
  2210. /// is below the computed threshold, then inlining was forcibly disabled by
  2211. /// some artifact of the routine.
  2212. InlineResult CallAnalyzer::analyze() {
  2213. ++NumCallsAnalyzed;
  2214. auto Result = onAnalysisStart();
  2215. if (!Result.isSuccess())
  2216. return Result;
  2217. if (F.empty())
  2218. return InlineResult::success();
  2219. Function *Caller = CandidateCall.getFunction();
  2220. // Check if the caller function is recursive itself.
  2221. for (User *U : Caller->users()) {
  2222. CallBase *Call = dyn_cast<CallBase>(U);
  2223. if (Call && Call->getFunction() == Caller) {
  2224. IsCallerRecursive = true;
  2225. break;
  2226. }
  2227. }
  2228. // Populate our simplified values by mapping from function arguments to call
  2229. // arguments with known important simplifications.
  2230. auto CAI = CandidateCall.arg_begin();
  2231. for (Argument &FAI : F.args()) {
  2232. assert(CAI != CandidateCall.arg_end());
  2233. if (Constant *C = dyn_cast<Constant>(CAI))
  2234. SimplifiedValues[&FAI] = C;
  2235. Value *PtrArg = *CAI;
  2236. if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
  2237. ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue());
  2238. // We can SROA any pointer arguments derived from alloca instructions.
  2239. if (auto *SROAArg = dyn_cast<AllocaInst>(PtrArg)) {
  2240. SROAArgValues[&FAI] = SROAArg;
  2241. onInitializeSROAArg(SROAArg);
  2242. EnabledSROAAllocas.insert(SROAArg);
  2243. }
  2244. }
  2245. ++CAI;
  2246. }
  2247. NumConstantArgs = SimplifiedValues.size();
  2248. NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
  2249. NumAllocaArgs = SROAArgValues.size();
  2250. // FIXME: If a caller has multiple calls to a callee, we end up recomputing
  2251. // the ephemeral values multiple times (and they're completely determined by
  2252. // the callee, so this is purely duplicate work).
  2253. SmallPtrSet<const Value *, 32> EphValues;
  2254. CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
  2255. // The worklist of live basic blocks in the callee *after* inlining. We avoid
  2256. // adding basic blocks of the callee which can be proven to be dead for this
  2257. // particular call site in order to get more accurate cost estimates. This
  2258. // requires a somewhat heavyweight iteration pattern: we need to walk the
  2259. // basic blocks in a breadth-first order as we insert live successors. To
  2260. // accomplish this, prioritizing for small iterations because we exit after
  2261. // crossing our threshold, we use a small-size optimized SetVector.
  2262. typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
  2263. SmallPtrSet<BasicBlock *, 16>>
  2264. BBSetVector;
  2265. BBSetVector BBWorklist;
  2266. BBWorklist.insert(&F.getEntryBlock());
  2267. // Note that we *must not* cache the size, this loop grows the worklist.
  2268. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
  2269. if (shouldStop())
  2270. break;
  2271. BasicBlock *BB = BBWorklist[Idx];
  2272. if (BB->empty())
  2273. continue;
  2274. onBlockStart(BB);
  2275. // Disallow inlining a blockaddress with uses other than strictly callbr.
  2276. // A blockaddress only has defined behavior for an indirect branch in the
  2277. // same function, and we do not currently support inlining indirect
  2278. // branches. But, the inliner may not see an indirect branch that ends up
  2279. // being dead code at a particular call site. If the blockaddress escapes
  2280. // the function, e.g., via a global variable, inlining may lead to an
  2281. // invalid cross-function reference.
  2282. // FIXME: pr/39560: continue relaxing this overt restriction.
  2283. if (BB->hasAddressTaken())
  2284. for (User *U : BlockAddress::get(&*BB)->users())
  2285. if (!isa<CallBrInst>(*U))
  2286. return InlineResult::failure("blockaddress used outside of callbr");
  2287. // Analyze the cost of this block. If we blow through the threshold, this
  2288. // returns false, and we can bail on out.
  2289. InlineResult IR = analyzeBlock(BB, EphValues);
  2290. if (!IR.isSuccess())
  2291. return IR;
  2292. Instruction *TI = BB->getTerminator();
  2293. // Add in the live successors by first checking whether we have terminator
  2294. // that may be simplified based on the values simplified by this call.
  2295. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  2296. if (BI->isConditional()) {
  2297. Value *Cond = BI->getCondition();
  2298. if (ConstantInt *SimpleCond =
  2299. dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
  2300. BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0);
  2301. BBWorklist.insert(NextBB);
  2302. KnownSuccessors[BB] = NextBB;
  2303. findDeadBlocks(BB, NextBB);
  2304. continue;
  2305. }
  2306. }
  2307. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
  2308. Value *Cond = SI->getCondition();
  2309. if (ConstantInt *SimpleCond =
  2310. dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
  2311. BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor();
  2312. BBWorklist.insert(NextBB);
  2313. KnownSuccessors[BB] = NextBB;
  2314. findDeadBlocks(BB, NextBB);
  2315. continue;
  2316. }
  2317. }
  2318. // If we're unable to select a particular successor, just count all of
  2319. // them.
  2320. for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
  2321. ++TIdx)
  2322. BBWorklist.insert(TI->getSuccessor(TIdx));
  2323. onBlockAnalyzed(BB);
  2324. }
  2325. // If this is a noduplicate call, we can still inline as long as
  2326. // inlining this would cause the removal of the caller (so the instruction
  2327. // is not actually duplicated, just moved).
  2328. if (!isSoleCallToLocalFunction(CandidateCall, F) && ContainsNoDuplicateCall)
  2329. return InlineResult::failure("noduplicate");
  2330. // If the callee's stack size exceeds the user-specified threshold,
  2331. // do not let it be inlined.
  2332. // The command line option overrides a limit set in the function attributes.
  2333. size_t FinalStackSizeThreshold = StackSizeThreshold;
  2334. if (!StackSizeThreshold.getNumOccurrences())
  2335. if (std::optional<int> AttrMaxStackSize = getStringFnAttrAsInt(
  2336. Caller, InlineConstants::MaxInlineStackSizeAttributeName))
  2337. FinalStackSizeThreshold = *AttrMaxStackSize;
  2338. if (AllocatedSize > FinalStackSizeThreshold)
  2339. return InlineResult::failure("stacksize");
  2340. return finalizeAnalysis();
  2341. }
  2342. void InlineCostCallAnalyzer::print(raw_ostream &OS) {
  2343. #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
  2344. if (PrintInstructionComments)
  2345. F.print(OS, &Writer);
  2346. DEBUG_PRINT_STAT(NumConstantArgs);
  2347. DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
  2348. DEBUG_PRINT_STAT(NumAllocaArgs);
  2349. DEBUG_PRINT_STAT(NumConstantPtrCmps);
  2350. DEBUG_PRINT_STAT(NumConstantPtrDiffs);
  2351. DEBUG_PRINT_STAT(NumInstructionsSimplified);
  2352. DEBUG_PRINT_STAT(NumInstructions);
  2353. DEBUG_PRINT_STAT(SROACostSavings);
  2354. DEBUG_PRINT_STAT(SROACostSavingsLost);
  2355. DEBUG_PRINT_STAT(LoadEliminationCost);
  2356. DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
  2357. DEBUG_PRINT_STAT(Cost);
  2358. DEBUG_PRINT_STAT(Threshold);
  2359. #undef DEBUG_PRINT_STAT
  2360. }
  2361. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  2362. /// Dump stats about this call's analysis.
  2363. LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
  2364. #endif
  2365. /// Test that there are no attribute conflicts between Caller and Callee
  2366. /// that prevent inlining.
  2367. static bool functionsHaveCompatibleAttributes(
  2368. Function *Caller, Function *Callee, TargetTransformInfo &TTI,
  2369. function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
  2370. // Note that CalleeTLI must be a copy not a reference. The legacy pass manager
  2371. // caches the most recently created TLI in the TargetLibraryInfoWrapperPass
  2372. // object, and always returns the same object (which is overwritten on each
  2373. // GetTLI call). Therefore we copy the first result.
  2374. auto CalleeTLI = GetTLI(*Callee);
  2375. return (IgnoreTTIInlineCompatible ||
  2376. TTI.areInlineCompatible(Caller, Callee)) &&
  2377. GetTLI(*Caller).areInlineCompatible(CalleeTLI,
  2378. InlineCallerSupersetNoBuiltin) &&
  2379. AttributeFuncs::areInlineCompatible(*Caller, *Callee);
  2380. }
  2381. int llvm::getCallsiteCost(const CallBase &Call, const DataLayout &DL) {
  2382. int64_t Cost = 0;
  2383. for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) {
  2384. if (Call.isByValArgument(I)) {
  2385. // We approximate the number of loads and stores needed by dividing the
  2386. // size of the byval type by the target's pointer size.
  2387. PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
  2388. unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I));
  2389. unsigned AS = PTy->getAddressSpace();
  2390. unsigned PointerSize = DL.getPointerSizeInBits(AS);
  2391. // Ceiling division.
  2392. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
  2393. // If it generates more than 8 stores it is likely to be expanded as an
  2394. // inline memcpy so we take that as an upper bound. Otherwise we assume
  2395. // one load and one store per word copied.
  2396. // FIXME: The maxStoresPerMemcpy setting from the target should be used
  2397. // here instead of a magic number of 8, but it's not available via
  2398. // DataLayout.
  2399. NumStores = std::min(NumStores, 8U);
  2400. Cost += 2 * NumStores * InstrCost;
  2401. } else {
  2402. // For non-byval arguments subtract off one instruction per call
  2403. // argument.
  2404. Cost += InstrCost;
  2405. }
  2406. }
  2407. // The call instruction also disappears after inlining.
  2408. Cost += InstrCost;
  2409. Cost += CallPenalty;
  2410. return std::min<int64_t>(Cost, INT_MAX);
  2411. }
  2412. InlineCost llvm::getInlineCost(
  2413. CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
  2414. function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
  2415. function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
  2416. function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
  2417. ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
  2418. return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
  2419. GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
  2420. }
  2421. std::optional<int> llvm::getInliningCostEstimate(
  2422. CallBase &Call, TargetTransformInfo &CalleeTTI,
  2423. function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
  2424. function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
  2425. ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
  2426. const InlineParams Params = {/* DefaultThreshold*/ 0,
  2427. /*HintThreshold*/ {},
  2428. /*ColdThreshold*/ {},
  2429. /*OptSizeThreshold*/ {},
  2430. /*OptMinSizeThreshold*/ {},
  2431. /*HotCallSiteThreshold*/ {},
  2432. /*LocallyHotCallSiteThreshold*/ {},
  2433. /*ColdCallSiteThreshold*/ {},
  2434. /*ComputeFullInlineCost*/ true,
  2435. /*EnableDeferral*/ true};
  2436. InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI,
  2437. GetAssumptionCache, GetBFI, PSI, ORE, true,
  2438. /*IgnoreThreshold*/ true);
  2439. auto R = CA.analyze();
  2440. if (!R.isSuccess())
  2441. return std::nullopt;
  2442. return CA.getCost();
  2443. }
  2444. std::optional<InlineCostFeatures> llvm::getInliningCostFeatures(
  2445. CallBase &Call, TargetTransformInfo &CalleeTTI,
  2446. function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
  2447. function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
  2448. ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
  2449. InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI,
  2450. ORE, *Call.getCalledFunction(), Call);
  2451. auto R = CFA.analyze();
  2452. if (!R.isSuccess())
  2453. return std::nullopt;
  2454. return CFA.features();
  2455. }
  2456. std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
  2457. CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI,
  2458. function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
  2459. // Cannot inline indirect calls.
  2460. if (!Callee)
  2461. return InlineResult::failure("indirect call");
  2462. // When callee coroutine function is inlined into caller coroutine function
  2463. // before coro-split pass,
  2464. // coro-early pass can not handle this quiet well.
  2465. // So we won't inline the coroutine function if it have not been unsplited
  2466. if (Callee->isPresplitCoroutine())
  2467. return InlineResult::failure("unsplited coroutine call");
  2468. // Never inline calls with byval arguments that does not have the alloca
  2469. // address space. Since byval arguments can be replaced with a copy to an
  2470. // alloca, the inlined code would need to be adjusted to handle that the
  2471. // argument is in the alloca address space (so it is a little bit complicated
  2472. // to solve).
  2473. unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace();
  2474. for (unsigned I = 0, E = Call.arg_size(); I != E; ++I)
  2475. if (Call.isByValArgument(I)) {
  2476. PointerType *PTy = cast<PointerType>(Call.getArgOperand(I)->getType());
  2477. if (PTy->getAddressSpace() != AllocaAS)
  2478. return InlineResult::failure("byval arguments without alloca"
  2479. " address space");
  2480. }
  2481. // Calls to functions with always-inline attributes should be inlined
  2482. // whenever possible.
  2483. if (Call.hasFnAttr(Attribute::AlwaysInline)) {
  2484. if (Call.getAttributes().hasFnAttr(Attribute::NoInline))
  2485. return InlineResult::failure("noinline call site attribute");
  2486. auto IsViable = isInlineViable(*Callee);
  2487. if (IsViable.isSuccess())
  2488. return InlineResult::success();
  2489. return InlineResult::failure(IsViable.getFailureReason());
  2490. }
  2491. // Never inline functions with conflicting attributes (unless callee has
  2492. // always-inline attribute).
  2493. Function *Caller = Call.getCaller();
  2494. if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
  2495. return InlineResult::failure("conflicting attributes");
  2496. // Don't inline this call if the caller has the optnone attribute.
  2497. if (Caller->hasOptNone())
  2498. return InlineResult::failure("optnone attribute");
  2499. // Don't inline a function that treats null pointer as valid into a caller
  2500. // that does not have this attribute.
  2501. if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
  2502. return InlineResult::failure("nullptr definitions incompatible");
  2503. // Don't inline functions which can be interposed at link-time.
  2504. if (Callee->isInterposable())
  2505. return InlineResult::failure("interposable");
  2506. // Don't inline functions marked noinline.
  2507. if (Callee->hasFnAttribute(Attribute::NoInline))
  2508. return InlineResult::failure("noinline function attribute");
  2509. // Don't inline call sites marked noinline.
  2510. if (Call.isNoInline())
  2511. return InlineResult::failure("noinline call site attribute");
  2512. return std::nullopt;
  2513. }
  2514. InlineCost llvm::getInlineCost(
  2515. CallBase &Call, Function *Callee, const InlineParams &Params,
  2516. TargetTransformInfo &CalleeTTI,
  2517. function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
  2518. function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
  2519. function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
  2520. ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
  2521. auto UserDecision =
  2522. llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
  2523. if (UserDecision) {
  2524. if (UserDecision->isSuccess())
  2525. return llvm::InlineCost::getAlways("always inline attribute");
  2526. return llvm::InlineCost::getNever(UserDecision->getFailureReason());
  2527. }
  2528. LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
  2529. << "... (caller:" << Call.getCaller()->getName()
  2530. << ")\n");
  2531. InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
  2532. GetAssumptionCache, GetBFI, PSI, ORE);
  2533. InlineResult ShouldInline = CA.analyze();
  2534. LLVM_DEBUG(CA.dump());
  2535. // Always make cost benefit based decision explicit.
  2536. // We use always/never here since threshold is not meaningful,
  2537. // as it's not what drives cost-benefit analysis.
  2538. if (CA.wasDecidedByCostBenefit()) {
  2539. if (ShouldInline.isSuccess())
  2540. return InlineCost::getAlways("benefit over cost",
  2541. CA.getCostBenefitPair());
  2542. else
  2543. return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
  2544. }
  2545. if (CA.wasDecidedByCostThreshold())
  2546. return InlineCost::get(CA.getCost(), CA.getThreshold(),
  2547. CA.getStaticBonusApplied());
  2548. // No details on how the decision was made, simply return always or never.
  2549. return ShouldInline.isSuccess()
  2550. ? InlineCost::getAlways("empty function")
  2551. : InlineCost::getNever(ShouldInline.getFailureReason());
  2552. }
  2553. InlineResult llvm::isInlineViable(Function &F) {
  2554. bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
  2555. for (BasicBlock &BB : F) {
  2556. // Disallow inlining of functions which contain indirect branches.
  2557. if (isa<IndirectBrInst>(BB.getTerminator()))
  2558. return InlineResult::failure("contains indirect branches");
  2559. // Disallow inlining of blockaddresses which are used by non-callbr
  2560. // instructions.
  2561. if (BB.hasAddressTaken())
  2562. for (User *U : BlockAddress::get(&BB)->users())
  2563. if (!isa<CallBrInst>(*U))
  2564. return InlineResult::failure("blockaddress used outside of callbr");
  2565. for (auto &II : BB) {
  2566. CallBase *Call = dyn_cast<CallBase>(&II);
  2567. if (!Call)
  2568. continue;
  2569. // Disallow recursive calls.
  2570. Function *Callee = Call->getCalledFunction();
  2571. if (&F == Callee)
  2572. return InlineResult::failure("recursive call");
  2573. // Disallow calls which expose returns-twice to a function not previously
  2574. // attributed as such.
  2575. if (!ReturnsTwice && isa<CallInst>(Call) &&
  2576. cast<CallInst>(Call)->canReturnTwice())
  2577. return InlineResult::failure("exposes returns-twice attribute");
  2578. if (Callee)
  2579. switch (Callee->getIntrinsicID()) {
  2580. default:
  2581. break;
  2582. case llvm::Intrinsic::icall_branch_funnel:
  2583. // Disallow inlining of @llvm.icall.branch.funnel because current
  2584. // backend can't separate call targets from call arguments.
  2585. return InlineResult::failure(
  2586. "disallowed inlining of @llvm.icall.branch.funnel");
  2587. case llvm::Intrinsic::localescape:
  2588. // Disallow inlining functions that call @llvm.localescape. Doing this
  2589. // correctly would require major changes to the inliner.
  2590. return InlineResult::failure(
  2591. "disallowed inlining of @llvm.localescape");
  2592. case llvm::Intrinsic::vastart:
  2593. // Disallow inlining of functions that initialize VarArgs with
  2594. // va_start.
  2595. return InlineResult::failure(
  2596. "contains VarArgs initialized with va_start");
  2597. }
  2598. }
  2599. }
  2600. return InlineResult::success();
  2601. }
  2602. // APIs to create InlineParams based on command line flags and/or other
  2603. // parameters.
  2604. InlineParams llvm::getInlineParams(int Threshold) {
  2605. InlineParams Params;
  2606. // This field is the threshold to use for a callee by default. This is
  2607. // derived from one or more of:
  2608. // * optimization or size-optimization levels,
  2609. // * a value passed to createFunctionInliningPass function, or
  2610. // * the -inline-threshold flag.
  2611. // If the -inline-threshold flag is explicitly specified, that is used
  2612. // irrespective of anything else.
  2613. if (InlineThreshold.getNumOccurrences() > 0)
  2614. Params.DefaultThreshold = InlineThreshold;
  2615. else
  2616. Params.DefaultThreshold = Threshold;
  2617. // Set the HintThreshold knob from the -inlinehint-threshold.
  2618. Params.HintThreshold = HintThreshold;
  2619. // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
  2620. Params.HotCallSiteThreshold = HotCallSiteThreshold;
  2621. // If the -locally-hot-callsite-threshold is explicitly specified, use it to
  2622. // populate LocallyHotCallSiteThreshold. Later, we populate
  2623. // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if
  2624. // we know that optimization level is O3 (in the getInlineParams variant that
  2625. // takes the opt and size levels).
  2626. // FIXME: Remove this check (and make the assignment unconditional) after
  2627. // addressing size regression issues at O2.
  2628. if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0)
  2629. Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
  2630. // Set the ColdCallSiteThreshold knob from the
  2631. // -inline-cold-callsite-threshold.
  2632. Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
  2633. // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
  2634. // -inlinehint-threshold commandline option is not explicitly given. If that
  2635. // option is present, then its value applies even for callees with size and
  2636. // minsize attributes.
  2637. // If the -inline-threshold is not specified, set the ColdThreshold from the
  2638. // -inlinecold-threshold even if it is not explicitly passed. If
  2639. // -inline-threshold is specified, then -inlinecold-threshold needs to be
  2640. // explicitly specified to set the ColdThreshold knob
  2641. if (InlineThreshold.getNumOccurrences() == 0) {
  2642. Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
  2643. Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
  2644. Params.ColdThreshold = ColdThreshold;
  2645. } else if (ColdThreshold.getNumOccurrences() > 0) {
  2646. Params.ColdThreshold = ColdThreshold;
  2647. }
  2648. return Params;
  2649. }
  2650. InlineParams llvm::getInlineParams() {
  2651. return getInlineParams(DefaultThreshold);
  2652. }
  2653. // Compute the default threshold for inlining based on the opt level and the
  2654. // size opt level.
  2655. static int computeThresholdFromOptLevels(unsigned OptLevel,
  2656. unsigned SizeOptLevel) {
  2657. if (OptLevel > 2)
  2658. return InlineConstants::OptAggressiveThreshold;
  2659. if (SizeOptLevel == 1) // -Os
  2660. return InlineConstants::OptSizeThreshold;
  2661. if (SizeOptLevel == 2) // -Oz
  2662. return InlineConstants::OptMinSizeThreshold;
  2663. return DefaultThreshold;
  2664. }
  2665. InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
  2666. auto Params =
  2667. getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
  2668. // At O3, use the value of -locally-hot-callsite-threshold option to populate
  2669. // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only
  2670. // when it is specified explicitly.
  2671. if (OptLevel > 2)
  2672. Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold;
  2673. return Params;
  2674. }
  2675. PreservedAnalyses
  2676. InlineCostAnnotationPrinterPass::run(Function &F,
  2677. FunctionAnalysisManager &FAM) {
  2678. PrintInstructionComments = true;
  2679. std::function<AssumptionCache &(Function &)> GetAssumptionCache =
  2680. [&](Function &F) -> AssumptionCache & {
  2681. return FAM.getResult<AssumptionAnalysis>(F);
  2682. };
  2683. Module *M = F.getParent();
  2684. ProfileSummaryInfo PSI(*M);
  2685. DataLayout DL(M);
  2686. TargetTransformInfo TTI(DL);
  2687. // FIXME: Redesign the usage of InlineParams to expand the scope of this pass.
  2688. // In the current implementation, the type of InlineParams doesn't matter as
  2689. // the pass serves only for verification of inliner's decisions.
  2690. // We can add a flag which determines InlineParams for this run. Right now,
  2691. // the default InlineParams are used.
  2692. const InlineParams Params = llvm::getInlineParams();
  2693. for (BasicBlock &BB : F) {
  2694. for (Instruction &I : BB) {
  2695. if (CallInst *CI = dyn_cast<CallInst>(&I)) {
  2696. Function *CalledFunction = CI->getCalledFunction();
  2697. if (!CalledFunction || CalledFunction->isDeclaration())
  2698. continue;
  2699. OptimizationRemarkEmitter ORE(CalledFunction);
  2700. InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI,
  2701. GetAssumptionCache, nullptr, &PSI, &ORE);
  2702. ICCA.analyze();
  2703. OS << " Analyzing call of " << CalledFunction->getName()
  2704. << "... (caller:" << CI->getCaller()->getName() << ")\n";
  2705. ICCA.print(OS);
  2706. OS << "\n";
  2707. }
  2708. }
  2709. }
  2710. return PreservedAnalyses::all();
  2711. }