FunctionSpecialization.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779
  1. //===- FunctionSpecialization.cpp - Function Specialization ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This specialises functions with constant parameters. Constant parameters
  10. // like function pointers and constant globals are propagated to the callee by
  11. // specializing the function. The main benefit of this pass at the moment is
  12. // that indirect calls are transformed into direct calls, which provides inline
  13. // opportunities that the inliner would not have been able to achieve. That's
  14. // why function specialisation is run before the inliner in the optimisation
  15. // pipeline; that is by design. Otherwise, we would only benefit from constant
  16. // passing, which is a valid use-case too, but hasn't been explored much in
  17. // terms of performance uplifts, cost-model and compile-time impact.
  18. //
  19. // Current limitations:
  20. // - It does not yet handle integer ranges. We do support "literal constants",
  21. // but that's off by default under an option.
  22. // - The cost-model could be further looked into (it mainly focuses on inlining
  23. // benefits),
  24. //
  25. // Ideas:
  26. // - With a function specialization attribute for arguments, we could have
  27. // a direct way to steer function specialization, avoiding the cost-model,
  28. // and thus control compile-times / code-size.
  29. //
  30. // Todos:
  31. // - Specializing recursive functions relies on running the transformation a
  32. // number of times, which is controlled by option
  33. // `func-specialization-max-iters`. Thus, increasing this value and the
  34. // number of iterations, will linearly increase the number of times recursive
  35. // functions get specialized, see also the discussion in
  36. // https://reviews.llvm.org/D106426 for details. Perhaps there is a
  37. // compile-time friendlier way to control/limit the number of specialisations
  38. // for recursive functions.
  39. // - Don't transform the function if function specialization does not trigger;
  40. // the SCCPSolver may make IR changes.
  41. //
  42. // References:
  43. // - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
  44. // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
  45. //
  46. //===----------------------------------------------------------------------===//
  47. #include "llvm/Transforms/IPO/FunctionSpecialization.h"
  48. #include "llvm/ADT/Statistic.h"
  49. #include "llvm/Analysis/CodeMetrics.h"
  50. #include "llvm/Analysis/InlineCost.h"
  51. #include "llvm/Analysis/LoopInfo.h"
  52. #include "llvm/Analysis/TargetTransformInfo.h"
  53. #include "llvm/Analysis/ValueLattice.h"
  54. #include "llvm/Analysis/ValueLatticeUtils.h"
  55. #include "llvm/IR/IntrinsicInst.h"
  56. #include "llvm/Transforms/Scalar/SCCP.h"
  57. #include "llvm/Transforms/Utils/Cloning.h"
  58. #include "llvm/Transforms/Utils/SCCPSolver.h"
  59. #include "llvm/Transforms/Utils/SizeOpts.h"
  60. #include <cmath>
  61. using namespace llvm;
  62. #define DEBUG_TYPE "function-specialization"
  63. STATISTIC(NumFuncSpecialized, "Number of functions specialized");
  64. static cl::opt<bool> ForceFunctionSpecialization(
  65. "force-function-specialization", cl::init(false), cl::Hidden,
  66. cl::desc("Force function specialization for every call site with a "
  67. "constant argument"));
  68. static cl::opt<unsigned> MaxClonesThreshold(
  69. "func-specialization-max-clones", cl::Hidden,
  70. cl::desc("The maximum number of clones allowed for a single function "
  71. "specialization"),
  72. cl::init(3));
  73. static cl::opt<unsigned> SmallFunctionThreshold(
  74. "func-specialization-size-threshold", cl::Hidden,
  75. cl::desc("Don't specialize functions that have less than this theshold "
  76. "number of instructions"),
  77. cl::init(100));
  78. static cl::opt<unsigned>
  79. AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
  80. cl::desc("Average loop iteration count cost"),
  81. cl::init(10));
  82. static cl::opt<bool> SpecializeOnAddresses(
  83. "func-specialization-on-address", cl::init(false), cl::Hidden,
  84. cl::desc("Enable function specialization on the address of global values"));
  85. // Disabled by default as it can significantly increase compilation times.
  86. //
  87. // https://llvm-compile-time-tracker.com
  88. // https://github.com/nikic/llvm-compile-time-tracker
  89. static cl::opt<bool> EnableSpecializationForLiteralConstant(
  90. "function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
  91. cl::desc("Enable specialization of functions that take a literal constant "
  92. "as an argument."));
  93. Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
  94. CallInst *Call) {
  95. Value *StoreValue = nullptr;
  96. for (auto *User : Alloca->users()) {
  97. // We can't use llvm::isAllocaPromotable() as that would fail because of
  98. // the usage in the CallInst, which is what we check here.
  99. if (User == Call)
  100. continue;
  101. if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
  102. if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
  103. return nullptr;
  104. continue;
  105. }
  106. if (auto *Store = dyn_cast<StoreInst>(User)) {
  107. // This is a duplicate store, bail out.
  108. if (StoreValue || Store->isVolatile())
  109. return nullptr;
  110. StoreValue = Store->getValueOperand();
  111. continue;
  112. }
  113. // Bail if there is any other unknown usage.
  114. return nullptr;
  115. }
  116. return getCandidateConstant(StoreValue);
  117. }
  118. // A constant stack value is an AllocaInst that has a single constant
  119. // value stored to it. Return this constant if such an alloca stack value
  120. // is a function argument.
  121. Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call,
  122. Value *Val) {
  123. if (!Val)
  124. return nullptr;
  125. Val = Val->stripPointerCasts();
  126. if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
  127. return ConstVal;
  128. auto *Alloca = dyn_cast<AllocaInst>(Val);
  129. if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
  130. return nullptr;
  131. return getPromotableAlloca(Alloca, Call);
  132. }
  133. // To support specializing recursive functions, it is important to propagate
  134. // constant arguments because after a first iteration of specialisation, a
  135. // reduced example may look like this:
  136. //
  137. // define internal void @RecursiveFn(i32* arg1) {
  138. // %temp = alloca i32, align 4
  139. // store i32 2 i32* %temp, align 4
  140. // call void @RecursiveFn.1(i32* nonnull %temp)
  141. // ret void
  142. // }
  143. //
  144. // Before a next iteration, we need to propagate the constant like so
  145. // which allows further specialization in next iterations.
  146. //
  147. // @funcspec.arg = internal constant i32 2
  148. //
  149. // define internal void @someFunc(i32* arg1) {
  150. // call void @otherFunc(i32* nonnull @funcspec.arg)
  151. // ret void
  152. // }
  153. //
  154. void FunctionSpecializer::promoteConstantStackValues() {
  155. // Iterate over the argument tracked functions see if there
  156. // are any new constant values for the call instruction via
  157. // stack variables.
  158. for (Function &F : M) {
  159. if (!Solver.isArgumentTrackedFunction(&F))
  160. continue;
  161. for (auto *User : F.users()) {
  162. auto *Call = dyn_cast<CallInst>(User);
  163. if (!Call)
  164. continue;
  165. if (!Solver.isBlockExecutable(Call->getParent()))
  166. continue;
  167. bool Changed = false;
  168. for (const Use &U : Call->args()) {
  169. unsigned Idx = Call->getArgOperandNo(&U);
  170. Value *ArgOp = Call->getArgOperand(Idx);
  171. Type *ArgOpType = ArgOp->getType();
  172. if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
  173. continue;
  174. auto *ConstVal = getConstantStackValue(Call, ArgOp);
  175. if (!ConstVal)
  176. continue;
  177. Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
  178. GlobalValue::InternalLinkage, ConstVal,
  179. "funcspec.arg");
  180. if (ArgOpType != ConstVal->getType())
  181. GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
  182. Call->setArgOperand(Idx, GV);
  183. Changed = true;
  184. }
  185. // Add the changed CallInst to Solver Worklist
  186. if (Changed)
  187. Solver.visitCall(*Call);
  188. }
  189. }
  190. }
  191. // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
  192. // interfere with the promoteConstantStackValues() optimization.
  193. static void removeSSACopy(Function &F) {
  194. for (BasicBlock &BB : F) {
  195. for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
  196. auto *II = dyn_cast<IntrinsicInst>(&Inst);
  197. if (!II)
  198. continue;
  199. if (II->getIntrinsicID() != Intrinsic::ssa_copy)
  200. continue;
  201. Inst.replaceAllUsesWith(II->getOperand(0));
  202. Inst.eraseFromParent();
  203. }
  204. }
  205. }
  206. /// Remove any ssa_copy intrinsics that may have been introduced.
  207. void FunctionSpecializer::cleanUpSSA() {
  208. for (Function *F : SpecializedFuncs)
  209. removeSSACopy(*F);
  210. }
  211. template <> struct llvm::DenseMapInfo<SpecSig> {
  212. static inline SpecSig getEmptyKey() { return {~0U, {}}; }
  213. static inline SpecSig getTombstoneKey() { return {~1U, {}}; }
  214. static unsigned getHashValue(const SpecSig &S) {
  215. return static_cast<unsigned>(hash_value(S));
  216. }
  217. static bool isEqual(const SpecSig &LHS, const SpecSig &RHS) {
  218. return LHS == RHS;
  219. }
  220. };
  221. /// Attempt to specialize functions in the module to enable constant
  222. /// propagation across function boundaries.
  223. ///
  224. /// \returns true if at least one function is specialized.
  225. bool FunctionSpecializer::run() {
  226. // Find possible specializations for each function.
  227. SpecMap SM;
  228. SmallVector<Spec, 32> AllSpecs;
  229. unsigned NumCandidates = 0;
  230. for (Function &F : M) {
  231. if (!isCandidateFunction(&F))
  232. continue;
  233. auto Cost = getSpecializationCost(&F);
  234. if (!Cost.isValid()) {
  235. LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for "
  236. << F.getName() << "\n");
  237. continue;
  238. }
  239. LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
  240. << F.getName() << " is " << Cost << "\n");
  241. if (!findSpecializations(&F, Cost, AllSpecs, SM)) {
  242. LLVM_DEBUG(
  243. dbgs() << "FnSpecialization: No possible specializations found for "
  244. << F.getName() << "\n");
  245. continue;
  246. }
  247. ++NumCandidates;
  248. }
  249. if (!NumCandidates) {
  250. LLVM_DEBUG(
  251. dbgs()
  252. << "FnSpecialization: No possible specializations found in module\n");
  253. return false;
  254. }
  255. // Choose the most profitable specialisations, which fit in the module
  256. // specialization budget, which is derived from maximum number of
  257. // specializations per specialization candidate function.
  258. auto CompareGain = [&AllSpecs](unsigned I, unsigned J) {
  259. return AllSpecs[I].Gain > AllSpecs[J].Gain;
  260. };
  261. const unsigned NSpecs =
  262. std::min(NumCandidates * MaxClonesThreshold, unsigned(AllSpecs.size()));
  263. SmallVector<unsigned> BestSpecs(NSpecs + 1);
  264. std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
  265. if (AllSpecs.size() > NSpecs) {
  266. LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
  267. << "the maximum number of clones threshold.\n"
  268. << "FnSpecialization: Specializing the "
  269. << NSpecs
  270. << " most profitable candidates.\n");
  271. std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareGain);
  272. for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {
  273. BestSpecs[NSpecs] = I;
  274. std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
  275. std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
  276. }
  277. }
  278. LLVM_DEBUG(dbgs() << "FnSpecialization: List of specializations \n";
  279. for (unsigned I = 0; I < NSpecs; ++I) {
  280. const Spec &S = AllSpecs[BestSpecs[I]];
  281. dbgs() << "FnSpecialization: Function " << S.F->getName()
  282. << " , gain " << S.Gain << "\n";
  283. for (const ArgInfo &Arg : S.Sig.Args)
  284. dbgs() << "FnSpecialization: FormalArg = "
  285. << Arg.Formal->getNameOrAsOperand()
  286. << ", ActualArg = " << Arg.Actual->getNameOrAsOperand()
  287. << "\n";
  288. });
  289. // Create the chosen specializations.
  290. SmallPtrSet<Function *, 8> OriginalFuncs;
  291. SmallVector<Function *> Clones;
  292. for (unsigned I = 0; I < NSpecs; ++I) {
  293. Spec &S = AllSpecs[BestSpecs[I]];
  294. S.Clone = createSpecialization(S.F, S.Sig);
  295. // Update the known call sites to call the clone.
  296. for (CallBase *Call : S.CallSites) {
  297. LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *Call
  298. << " to call " << S.Clone->getName() << "\n");
  299. Call->setCalledFunction(S.Clone);
  300. }
  301. Clones.push_back(S.Clone);
  302. OriginalFuncs.insert(S.F);
  303. }
  304. Solver.solveWhileResolvedUndefsIn(Clones);
  305. // Update the rest of the call sites - these are the recursive calls, calls
  306. // to discarded specialisations and calls that may match a specialisation
  307. // after the solver runs.
  308. for (Function *F : OriginalFuncs) {
  309. auto [Begin, End] = SM[F];
  310. updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
  311. }
  312. promoteConstantStackValues();
  313. LLVM_DEBUG(if (NbFunctionsSpecialized) dbgs()
  314. << "FnSpecialization: Specialized " << NbFunctionsSpecialized
  315. << " functions in module " << M.getName() << "\n");
  316. NumFuncSpecialized += NbFunctionsSpecialized;
  317. return true;
  318. }
  319. void FunctionSpecializer::removeDeadFunctions() {
  320. for (Function *F : FullySpecialized) {
  321. LLVM_DEBUG(dbgs() << "FnSpecialization: Removing dead function "
  322. << F->getName() << "\n");
  323. if (FAM)
  324. FAM->clear(*F, F->getName());
  325. F->eraseFromParent();
  326. }
  327. FullySpecialized.clear();
  328. }
  329. // Compute the code metrics for function \p F.
  330. CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) {
  331. auto I = FunctionMetrics.insert({F, CodeMetrics()});
  332. CodeMetrics &Metrics = I.first->second;
  333. if (I.second) {
  334. // The code metrics were not cached.
  335. SmallPtrSet<const Value *, 32> EphValues;
  336. CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
  337. for (BasicBlock &BB : *F)
  338. Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
  339. LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
  340. << F->getName() << " is " << Metrics.NumInsts
  341. << " instructions\n");
  342. }
  343. return Metrics;
  344. }
  345. /// Clone the function \p F and remove the ssa_copy intrinsics added by
  346. /// the SCCPSolver in the cloned version.
  347. static Function *cloneCandidateFunction(Function *F) {
  348. ValueToValueMapTy Mappings;
  349. Function *Clone = CloneFunction(F, Mappings);
  350. removeSSACopy(*Clone);
  351. return Clone;
  352. }
  353. bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
  354. SmallVectorImpl<Spec> &AllSpecs,
  355. SpecMap &SM) {
  356. // A mapping from a specialisation signature to the index of the respective
  357. // entry in the all specialisation array. Used to ensure uniqueness of
  358. // specialisations.
  359. DenseMap<SpecSig, unsigned> UM;
  360. // Get a list of interesting arguments.
  361. SmallVector<Argument *> Args;
  362. for (Argument &Arg : F->args())
  363. if (isArgumentInteresting(&Arg))
  364. Args.push_back(&Arg);
  365. if (Args.empty())
  366. return false;
  367. bool Found = false;
  368. for (User *U : F->users()) {
  369. if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
  370. continue;
  371. auto &CS = *cast<CallBase>(U);
  372. // The user instruction does not call our function.
  373. if (CS.getCalledFunction() != F)
  374. continue;
  375. // If the call site has attribute minsize set, that callsite won't be
  376. // specialized.
  377. if (CS.hasFnAttr(Attribute::MinSize))
  378. continue;
  379. // If the parent of the call site will never be executed, we don't need
  380. // to worry about the passed value.
  381. if (!Solver.isBlockExecutable(CS.getParent()))
  382. continue;
  383. // Examine arguments and create a specialisation candidate from the
  384. // constant operands of this call site.
  385. SpecSig S;
  386. for (Argument *A : Args) {
  387. Constant *C = getCandidateConstant(CS.getArgOperand(A->getArgNo()));
  388. if (!C)
  389. continue;
  390. LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting argument "
  391. << A->getName() << " : " << C->getNameOrAsOperand()
  392. << "\n");
  393. S.Args.push_back({A, C});
  394. }
  395. if (S.Args.empty())
  396. continue;
  397. // Check if we have encountered the same specialisation already.
  398. if (auto It = UM.find(S); It != UM.end()) {
  399. // Existing specialisation. Add the call to the list to rewrite, unless
  400. // it's a recursive call. A specialisation, generated because of a
  401. // recursive call may end up as not the best specialisation for all
  402. // the cloned instances of this call, which result from specialising
  403. // functions. Hence we don't rewrite the call directly, but match it with
  404. // the best specialisation once all specialisations are known.
  405. if (CS.getFunction() == F)
  406. continue;
  407. const unsigned Index = It->second;
  408. AllSpecs[Index].CallSites.push_back(&CS);
  409. } else {
  410. // Calculate the specialisation gain.
  411. InstructionCost Gain = 0 - Cost;
  412. for (ArgInfo &A : S.Args)
  413. Gain +=
  414. getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
  415. // Discard unprofitable specialisations.
  416. if (!ForceFunctionSpecialization && Gain <= 0)
  417. continue;
  418. // Create a new specialisation entry.
  419. auto &Spec = AllSpecs.emplace_back(F, S, Gain);
  420. if (CS.getFunction() != F)
  421. Spec.CallSites.push_back(&CS);
  422. const unsigned Index = AllSpecs.size() - 1;
  423. UM[S] = Index;
  424. if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
  425. It->second.second = Index + 1;
  426. Found = true;
  427. }
  428. }
  429. return Found;
  430. }
  431. bool FunctionSpecializer::isCandidateFunction(Function *F) {
  432. if (F->isDeclaration())
  433. return false;
  434. if (F->hasFnAttribute(Attribute::NoDuplicate))
  435. return false;
  436. if (!Solver.isArgumentTrackedFunction(F))
  437. return false;
  438. // Do not specialize the cloned function again.
  439. if (SpecializedFuncs.contains(F))
  440. return false;
  441. // If we're optimizing the function for size, we shouldn't specialize it.
  442. if (F->hasOptSize() ||
  443. shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass))
  444. return false;
  445. // Exit if the function is not executable. There's no point in specializing
  446. // a dead function.
  447. if (!Solver.isBlockExecutable(&F->getEntryBlock()))
  448. return false;
  449. // It wastes time to specialize a function which would get inlined finally.
  450. if (F->hasFnAttribute(Attribute::AlwaysInline))
  451. return false;
  452. LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
  453. << "\n");
  454. return true;
  455. }
  456. Function *FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S) {
  457. Function *Clone = cloneCandidateFunction(F);
  458. // Initialize the lattice state of the arguments of the function clone,
  459. // marking the argument on which we specialized the function constant
  460. // with the given value.
  461. Solver.markArgInFuncSpecialization(Clone, S.Args);
  462. Solver.addArgumentTrackedFunction(Clone);
  463. Solver.markBlockExecutable(&Clone->front());
  464. // Mark all the specialized functions
  465. SpecializedFuncs.insert(Clone);
  466. NbFunctionsSpecialized++;
  467. return Clone;
  468. }
  469. /// Compute and return the cost of specializing function \p F.
  470. InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) {
  471. CodeMetrics &Metrics = analyzeFunction(F);
  472. // If the code metrics reveal that we shouldn't duplicate the function, we
  473. // shouldn't specialize it. Set the specialization cost to Invalid.
  474. // Or if the lines of codes implies that this function is easy to get
  475. // inlined so that we shouldn't specialize it.
  476. if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
  477. (!ForceFunctionSpecialization &&
  478. !F->hasFnAttribute(Attribute::NoInline) &&
  479. Metrics.NumInsts < SmallFunctionThreshold))
  480. return InstructionCost::getInvalid();
  481. // Otherwise, set the specialization cost to be the cost of all the
  482. // instructions in the function.
  483. return Metrics.NumInsts * InlineConstants::getInstrCost();
  484. }
  485. static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
  486. const LoopInfo &LI) {
  487. auto *I = dyn_cast_or_null<Instruction>(U);
  488. // If not an instruction we do not know how to evaluate.
  489. // Keep minimum possible cost for now so that it doesnt affect
  490. // specialization.
  491. if (!I)
  492. return std::numeric_limits<unsigned>::min();
  493. InstructionCost Cost =
  494. TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency);
  495. // Increase the cost if it is inside the loop.
  496. unsigned LoopDepth = LI.getLoopDepth(I->getParent());
  497. Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth);
  498. // Traverse recursively if there are more uses.
  499. // TODO: Any other instructions to be added here?
  500. if (I->mayReadFromMemory() || I->isCast())
  501. for (auto *User : I->users())
  502. Cost += getUserBonus(User, TTI, LI);
  503. return Cost;
  504. }
  505. /// Compute a bonus for replacing argument \p A with constant \p C.
  506. InstructionCost
  507. FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
  508. const LoopInfo &LI) {
  509. Function *F = A->getParent();
  510. auto &TTI = (GetTTI)(*F);
  511. LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
  512. << C->getNameOrAsOperand() << "\n");
  513. InstructionCost TotalCost = 0;
  514. for (auto *U : A->users()) {
  515. TotalCost += getUserBonus(U, TTI, LI);
  516. LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
  517. TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
  518. }
  519. // The below heuristic is only concerned with exposing inlining
  520. // opportunities via indirect call promotion. If the argument is not a
  521. // (potentially casted) function pointer, give up.
  522. Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
  523. if (!CalledFunction)
  524. return TotalCost;
  525. // Get TTI for the called function (used for the inline cost).
  526. auto &CalleeTTI = (GetTTI)(*CalledFunction);
  527. // Look at all the call sites whose called value is the argument.
  528. // Specializing the function on the argument would allow these indirect
  529. // calls to be promoted to direct calls. If the indirect call promotion
  530. // would likely enable the called function to be inlined, specializing is a
  531. // good idea.
  532. int Bonus = 0;
  533. for (User *U : A->users()) {
  534. if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
  535. continue;
  536. auto *CS = cast<CallBase>(U);
  537. if (CS->getCalledOperand() != A)
  538. continue;
  539. if (CS->getFunctionType() != CalledFunction->getFunctionType())
  540. continue;
  541. // Get the cost of inlining the called function at this call site. Note
  542. // that this is only an estimate. The called function may eventually
  543. // change in a way that leads to it not being inlined here, even though
  544. // inlining looks profitable now. For example, one of its called
  545. // functions may be inlined into it, making the called function too large
  546. // to be inlined into this call site.
  547. //
  548. // We apply a boost for performing indirect call promotion by increasing
  549. // the default threshold by the threshold for indirect calls.
  550. auto Params = getInlineParams();
  551. Params.DefaultThreshold += InlineConstants::IndirectCallThreshold;
  552. InlineCost IC =
  553. getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
  554. // We clamp the bonus for this call to be between zero and the default
  555. // threshold.
  556. if (IC.isAlways())
  557. Bonus += Params.DefaultThreshold;
  558. else if (IC.isVariable() && IC.getCostDelta() > 0)
  559. Bonus += IC.getCostDelta();
  560. LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
  561. << " for user " << *U << "\n");
  562. }
  563. return TotalCost + Bonus;
  564. }
  565. /// Determine if it is possible to specialise the function for constant values
  566. /// of the formal parameter \p A.
  567. bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
  568. // No point in specialization if the argument is unused.
  569. if (A->user_empty())
  570. return false;
  571. // For now, don't attempt to specialize functions based on the values of
  572. // composite types.
  573. Type *ArgTy = A->getType();
  574. if (!ArgTy->isSingleValueType())
  575. return false;
  576. // Specialization of integer and floating point types needs to be explicitly
  577. // enabled.
  578. if (!EnableSpecializationForLiteralConstant &&
  579. (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy()))
  580. return false;
  581. // SCCP solver does not record an argument that will be constructed on
  582. // stack.
  583. if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory())
  584. return false;
  585. // Check the lattice value and decide if we should attemt to specialize,
  586. // based on this argument. No point in specialization, if the lattice value
  587. // is already a constant.
  588. const ValueLatticeElement &LV = Solver.getLatticeValueFor(A);
  589. if (LV.isUnknownOrUndef() || LV.isConstant() ||
  590. (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) {
  591. LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, parameter "
  592. << A->getNameOrAsOperand() << " is already constant\n");
  593. return false;
  594. }
  595. LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting parameter "
  596. << A->getNameOrAsOperand() << "\n");
  597. return true;
  598. }
  599. /// Check if the valuy \p V (an actual argument) is a constant or can only
  600. /// have a constant value. Return that constant.
  601. Constant *FunctionSpecializer::getCandidateConstant(Value *V) {
  602. if (isa<PoisonValue>(V))
  603. return nullptr;
  604. // TrackValueOfGlobalVariable only tracks scalar global variables.
  605. if (auto *GV = dyn_cast<GlobalVariable>(V)) {
  606. // Check if we want to specialize on the address of non-constant
  607. // global values.
  608. if (!GV->isConstant() && !SpecializeOnAddresses)
  609. return nullptr;
  610. if (!GV->getValueType()->isSingleValueType())
  611. return nullptr;
  612. }
  613. // Select for possible specialisation values that are constants or
  614. // are deduced to be constants or constant ranges with a single element.
  615. Constant *C = dyn_cast<Constant>(V);
  616. if (!C) {
  617. const ValueLatticeElement &LV = Solver.getLatticeValueFor(V);
  618. if (LV.isConstant())
  619. C = LV.getConstant();
  620. else if (LV.isConstantRange() && LV.getConstantRange().isSingleElement()) {
  621. assert(V->getType()->isIntegerTy() && "Non-integral constant range");
  622. C = Constant::getIntegerValue(V->getType(),
  623. *LV.getConstantRange().getSingleElement());
  624. } else
  625. return nullptr;
  626. }
  627. return C;
  628. }
  629. void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
  630. const Spec *End) {
  631. // Collect the call sites that need updating.
  632. SmallVector<CallBase *> ToUpdate;
  633. for (User *U : F->users())
  634. if (auto *CS = dyn_cast<CallBase>(U);
  635. CS && CS->getCalledFunction() == F &&
  636. Solver.isBlockExecutable(CS->getParent()))
  637. ToUpdate.push_back(CS);
  638. unsigned NCallsLeft = ToUpdate.size();
  639. for (CallBase *CS : ToUpdate) {
  640. bool ShouldDecrementCount = CS->getFunction() == F;
  641. // Find the best matching specialisation.
  642. const Spec *BestSpec = nullptr;
  643. for (const Spec &S : make_range(Begin, End)) {
  644. if (!S.Clone || (BestSpec && S.Gain <= BestSpec->Gain))
  645. continue;
  646. if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {
  647. unsigned ArgNo = Arg.Formal->getArgNo();
  648. return getCandidateConstant(CS->getArgOperand(ArgNo)) != Arg.Actual;
  649. }))
  650. continue;
  651. BestSpec = &S;
  652. }
  653. if (BestSpec) {
  654. LLVM_DEBUG(dbgs() << "FnSpecialization: Redirecting " << *CS
  655. << " to call " << BestSpec->Clone->getName() << "\n");
  656. CS->setCalledFunction(BestSpec->Clone);
  657. ShouldDecrementCount = true;
  658. }
  659. if (ShouldDecrementCount)
  660. --NCallsLeft;
  661. }
  662. // If the function has been completely specialized, the original function
  663. // is no longer needed. Mark it unreachable.
  664. if (NCallsLeft == 0) {
  665. Solver.markFunctionUnreachable(F);
  666. FullySpecialized.insert(F);
  667. }
  668. }