FunctionSpecialization.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892
  1. //===- FunctionSpecialization.cpp - Function Specialization ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This specialises functions with constant parameters. Constant parameters
  10. // like function pointers and constant globals are propagated to the callee by
  11. // specializing the function. The main benefit of this pass at the moment is
  12. // that indirect calls are transformed into direct calls, which provides inline
  13. // opportunities that the inliner would not have been able to achieve. That's
  14. // why function specialisation is run before the inliner in the optimisation
  15. // pipeline; that is by design. Otherwise, we would only benefit from constant
  16. // passing, which is a valid use-case too, but hasn't been explored much in
  17. // terms of performance uplifts, cost-model and compile-time impact.
  18. //
  19. // Current limitations:
  20. // - It does not yet handle integer ranges. We do support "literal constants",
  21. // but that's off by default under an option.
  22. // - Only 1 argument per function is specialised,
  23. // - The cost-model could be further looked into (it mainly focuses on inlining
  24. // benefits),
  25. // - We are not yet caching analysis results, but profiling and checking where
  26. // extra compile time is spent didn't suggest this to be a problem.
  27. //
  28. // Ideas:
  29. // - With a function specialization attribute for arguments, we could have
  30. // a direct way to steer function specialization, avoiding the cost-model,
  31. // and thus control compile-times / code-size.
  32. //
  33. // Todos:
  34. // - Specializing recursive functions relies on running the transformation a
  35. // number of times, which is controlled by option
  36. // `func-specialization-max-iters`. Thus, increasing this value and the
  37. // number of iterations, will linearly increase the number of times recursive
  38. // functions get specialized, see also the discussion in
  39. // https://reviews.llvm.org/D106426 for details. Perhaps there is a
  40. // compile-time friendlier way to control/limit the number of specialisations
  41. // for recursive functions.
  42. // - Don't transform the function if function specialization does not trigger;
  43. // the SCCPSolver may make IR changes.
  44. //
  45. // References:
  46. // - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
  47. // it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
  48. //
  49. //===----------------------------------------------------------------------===//
  50. #include "llvm/ADT/Statistic.h"
  51. #include "llvm/Analysis/AssumptionCache.h"
  52. #include "llvm/Analysis/CodeMetrics.h"
  53. #include "llvm/Analysis/DomTreeUpdater.h"
  54. #include "llvm/Analysis/InlineCost.h"
  55. #include "llvm/Analysis/LoopInfo.h"
  56. #include "llvm/Analysis/TargetLibraryInfo.h"
  57. #include "llvm/Analysis/TargetTransformInfo.h"
  58. #include "llvm/Transforms/Scalar/SCCP.h"
  59. #include "llvm/Transforms/Utils/Cloning.h"
  60. #include "llvm/Transforms/Utils/SizeOpts.h"
  61. #include <cmath>
  62. using namespace llvm;
  63. #define DEBUG_TYPE "function-specialization"
  64. STATISTIC(NumFuncSpecialized, "Number of functions specialized");
  65. static cl::opt<bool> ForceFunctionSpecialization(
  66. "force-function-specialization", cl::init(false), cl::Hidden,
  67. cl::desc("Force function specialization for every call site with a "
  68. "constant argument"));
  69. static cl::opt<unsigned> FuncSpecializationMaxIters(
  70. "func-specialization-max-iters", cl::Hidden,
  71. cl::desc("The maximum number of iterations function specialization is run"),
  72. cl::init(1));
  73. static cl::opt<unsigned> MaxClonesThreshold(
  74. "func-specialization-max-clones", cl::Hidden,
  75. cl::desc("The maximum number of clones allowed for a single function "
  76. "specialization"),
  77. cl::init(3));
  78. static cl::opt<unsigned> SmallFunctionThreshold(
  79. "func-specialization-size-threshold", cl::Hidden,
  80. cl::desc("Don't specialize functions that have less than this theshold "
  81. "number of instructions"),
  82. cl::init(100));
  83. static cl::opt<unsigned>
  84. AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
  85. cl::desc("Average loop iteration count cost"),
  86. cl::init(10));
  87. static cl::opt<bool> SpecializeOnAddresses(
  88. "func-specialization-on-address", cl::init(false), cl::Hidden,
  89. cl::desc("Enable function specialization on the address of global values"));
  90. // TODO: This needs checking to see the impact on compile-times, which is why
  91. // this is off by default for now.
  92. static cl::opt<bool> EnableSpecializationForLiteralConstant(
  93. "function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
  94. cl::desc("Enable specialization of functions that take a literal constant "
  95. "as an argument."));
  96. namespace {
  97. // Bookkeeping struct to pass data from the analysis and profitability phase
  98. // to the actual transform helper functions.
  99. struct ArgInfo {
  100. Function *Fn; // The function to perform specialisation on.
  101. Argument *Arg; // The Formal argument being analysed.
  102. Constant *Const; // A corresponding actual constant argument.
  103. InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
  104. // Flag if this will be a partial specialization, in which case we will need
  105. // to keep the original function around in addition to the added
  106. // specializations.
  107. bool Partial = false;
  108. ArgInfo(Function *F, Argument *A, Constant *C, InstructionCost G)
  109. : Fn(F), Arg(A), Const(C), Gain(G){};
  110. };
  111. } // Anonymous namespace
  112. using FuncList = SmallVectorImpl<Function *>;
  113. using ConstList = SmallVectorImpl<Constant *>;
  114. // Helper to check if \p LV is either a constant or a constant
  115. // range with a single element. This should cover exactly the same cases as the
  116. // old ValueLatticeElement::isConstant() and is intended to be used in the
  117. // transition to ValueLatticeElement.
  118. static bool isConstant(const ValueLatticeElement &LV) {
  119. return LV.isConstant() ||
  120. (LV.isConstantRange() && LV.getConstantRange().isSingleElement());
  121. }
  122. // Helper to check if \p LV is either overdefined or a constant int.
  123. static bool isOverdefined(const ValueLatticeElement &LV) {
  124. return !LV.isUnknownOrUndef() && !isConstant(LV);
  125. }
  126. static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) {
  127. Value *StoreValue = nullptr;
  128. for (auto *User : Alloca->users()) {
  129. // We can't use llvm::isAllocaPromotable() as that would fail because of
  130. // the usage in the CallInst, which is what we check here.
  131. if (User == Call)
  132. continue;
  133. if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
  134. if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
  135. return nullptr;
  136. continue;
  137. }
  138. if (auto *Store = dyn_cast<StoreInst>(User)) {
  139. // This is a duplicate store, bail out.
  140. if (StoreValue || Store->isVolatile())
  141. return nullptr;
  142. StoreValue = Store->getValueOperand();
  143. continue;
  144. }
  145. // Bail if there is any other unknown usage.
  146. return nullptr;
  147. }
  148. return dyn_cast_or_null<Constant>(StoreValue);
  149. }
  150. // A constant stack value is an AllocaInst that has a single constant
  151. // value stored to it. Return this constant if such an alloca stack value
  152. // is a function argument.
  153. static Constant *getConstantStackValue(CallInst *Call, Value *Val,
  154. SCCPSolver &Solver) {
  155. if (!Val)
  156. return nullptr;
  157. Val = Val->stripPointerCasts();
  158. if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
  159. return ConstVal;
  160. auto *Alloca = dyn_cast<AllocaInst>(Val);
  161. if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
  162. return nullptr;
  163. return getPromotableAlloca(Alloca, Call);
  164. }
  165. // To support specializing recursive functions, it is important to propagate
  166. // constant arguments because after a first iteration of specialisation, a
  167. // reduced example may look like this:
  168. //
  169. // define internal void @RecursiveFn(i32* arg1) {
  170. // %temp = alloca i32, align 4
  171. // store i32 2 i32* %temp, align 4
  172. // call void @RecursiveFn.1(i32* nonnull %temp)
  173. // ret void
  174. // }
  175. //
  176. // Before a next iteration, we need to propagate the constant like so
  177. // which allows further specialization in next iterations.
  178. //
  179. // @funcspec.arg = internal constant i32 2
  180. //
  181. // define internal void @someFunc(i32* arg1) {
  182. // call void @otherFunc(i32* nonnull @funcspec.arg)
  183. // ret void
  184. // }
  185. //
  186. static void constantArgPropagation(FuncList &WorkList,
  187. Module &M, SCCPSolver &Solver) {
  188. // Iterate over the argument tracked functions see if there
  189. // are any new constant values for the call instruction via
  190. // stack variables.
  191. for (auto *F : WorkList) {
  192. // TODO: Generalize for any read only arguments.
  193. if (F->arg_size() != 1)
  194. continue;
  195. auto &Arg = *F->arg_begin();
  196. if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy())
  197. continue;
  198. for (auto *User : F->users()) {
  199. auto *Call = dyn_cast<CallInst>(User);
  200. if (!Call)
  201. break;
  202. auto *ArgOp = Call->getArgOperand(0);
  203. auto *ArgOpType = ArgOp->getType();
  204. auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
  205. if (!ConstVal)
  206. break;
  207. Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
  208. GlobalValue::InternalLinkage, ConstVal,
  209. "funcspec.arg");
  210. if (ArgOpType != ConstVal->getType())
  211. GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType());
  212. Call->setArgOperand(0, GV);
  213. // Add the changed CallInst to Solver Worklist
  214. Solver.visitCall(*Call);
  215. }
  216. }
  217. }
  218. // ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
  219. // interfere with the constantArgPropagation optimization.
  220. static void removeSSACopy(Function &F) {
  221. for (BasicBlock &BB : F) {
  222. for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
  223. auto *II = dyn_cast<IntrinsicInst>(&Inst);
  224. if (!II)
  225. continue;
  226. if (II->getIntrinsicID() != Intrinsic::ssa_copy)
  227. continue;
  228. Inst.replaceAllUsesWith(II->getOperand(0));
  229. Inst.eraseFromParent();
  230. }
  231. }
  232. }
  233. static void removeSSACopy(Module &M) {
  234. for (Function &F : M)
  235. removeSSACopy(F);
  236. }
  237. namespace {
  238. class FunctionSpecializer {
  239. /// The IPSCCP Solver.
  240. SCCPSolver &Solver;
  241. /// Analyses used to help determine if a function should be specialized.
  242. std::function<AssumptionCache &(Function &)> GetAC;
  243. std::function<TargetTransformInfo &(Function &)> GetTTI;
  244. std::function<TargetLibraryInfo &(Function &)> GetTLI;
  245. SmallPtrSet<Function *, 2> SpecializedFuncs;
  246. public:
  247. FunctionSpecializer(SCCPSolver &Solver,
  248. std::function<AssumptionCache &(Function &)> GetAC,
  249. std::function<TargetTransformInfo &(Function &)> GetTTI,
  250. std::function<TargetLibraryInfo &(Function &)> GetTLI)
  251. : Solver(Solver), GetAC(GetAC), GetTTI(GetTTI), GetTLI(GetTLI) {}
  252. /// Attempt to specialize functions in the module to enable constant
  253. /// propagation across function boundaries.
  254. ///
  255. /// \returns true if at least one function is specialized.
  256. bool
  257. specializeFunctions(FuncList &FuncDecls,
  258. FuncList &CurrentSpecializations) {
  259. bool Changed = false;
  260. for (auto *F : FuncDecls) {
  261. if (!isCandidateFunction(F, CurrentSpecializations))
  262. continue;
  263. auto Cost = getSpecializationCost(F);
  264. if (!Cost.isValid()) {
  265. LLVM_DEBUG(
  266. dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
  267. continue;
  268. }
  269. auto ConstArgs = calculateGains(F, Cost);
  270. if (ConstArgs.empty()) {
  271. LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
  272. continue;
  273. }
  274. for (auto &CA : ConstArgs) {
  275. specializeFunction(CA, CurrentSpecializations);
  276. Changed = true;
  277. }
  278. }
  279. updateSpecializedFuncs(FuncDecls, CurrentSpecializations);
  280. NumFuncSpecialized += NbFunctionsSpecialized;
  281. return Changed;
  282. }
  283. bool tryToReplaceWithConstant(Value *V) {
  284. if (!V->getType()->isSingleValueType() || isa<CallBase>(V) ||
  285. V->user_empty())
  286. return false;
  287. const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
  288. if (isOverdefined(IV))
  289. return false;
  290. auto *Const =
  291. isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
  292. V->replaceAllUsesWith(Const);
  293. for (auto *U : Const->users())
  294. if (auto *I = dyn_cast<Instruction>(U))
  295. if (Solver.isBlockExecutable(I->getParent()))
  296. Solver.visit(I);
  297. // Remove the instruction from Block and Solver.
  298. if (auto *I = dyn_cast<Instruction>(V)) {
  299. if (I->isSafeToRemove()) {
  300. I->eraseFromParent();
  301. Solver.removeLatticeValueFor(I);
  302. }
  303. }
  304. return true;
  305. }
  306. private:
  307. // The number of functions specialised, used for collecting statistics and
  308. // also in the cost model.
  309. unsigned NbFunctionsSpecialized = 0;
  310. /// Clone the function \p F and remove the ssa_copy intrinsics added by
  311. /// the SCCPSolver in the cloned version.
  312. Function *cloneCandidateFunction(Function *F) {
  313. ValueToValueMapTy EmptyMap;
  314. Function *Clone = CloneFunction(F, EmptyMap);
  315. removeSSACopy(*Clone);
  316. return Clone;
  317. }
  318. /// This function decides whether it's worthwhile to specialize function \p F
  319. /// based on the known constant values its arguments can take on, i.e. it
  320. /// calculates a gain and returns a list of actual arguments that are deemed
  321. /// profitable to specialize. Specialization is performed on the first
  322. /// interesting argument. Specializations based on additional arguments will
  323. /// be evaluated on following iterations of the main IPSCCP solve loop.
  324. SmallVector<ArgInfo> calculateGains(Function *F, InstructionCost Cost) {
  325. SmallVector<ArgInfo> Worklist;
  326. // Determine if we should specialize the function based on the values the
  327. // argument can take on. If specialization is not profitable, we continue
  328. // on to the next argument.
  329. for (Argument &FormalArg : F->args()) {
  330. LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing arg: "
  331. << FormalArg.getName() << "\n");
  332. // Determine if this argument is interesting. If we know the argument can
  333. // take on any constant values, they are collected in Constants. If the
  334. // argument can only ever equal a constant value in Constants, the
  335. // function will be completely specialized, and the IsPartial flag will
  336. // be set to false by isArgumentInteresting (that function only adds
  337. // values to the Constants list that are deemed profitable).
  338. bool IsPartial = true;
  339. SmallVector<Constant *> ActualConstArg;
  340. if (!isArgumentInteresting(&FormalArg, ActualConstArg, IsPartial)) {
  341. LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
  342. continue;
  343. }
  344. for (auto *ActualArg : ActualConstArg) {
  345. InstructionCost Gain =
  346. ForceFunctionSpecialization
  347. ? 1
  348. : getSpecializationBonus(&FormalArg, ActualArg) - Cost;
  349. if (Gain <= 0)
  350. continue;
  351. Worklist.push_back({F, &FormalArg, ActualArg, Gain});
  352. }
  353. if (Worklist.empty())
  354. continue;
  355. // Sort the candidates in descending order.
  356. llvm::stable_sort(Worklist, [](const ArgInfo &L, const ArgInfo &R) {
  357. return L.Gain > R.Gain;
  358. });
  359. // Truncate the worklist to 'MaxClonesThreshold' candidates if
  360. // necessary.
  361. if (Worklist.size() > MaxClonesThreshold) {
  362. LLVM_DEBUG(dbgs() << "FnSpecialization: number of candidates exceed "
  363. << "the maximum number of clones threshold.\n"
  364. << "Truncating worklist to " << MaxClonesThreshold
  365. << " candidates.\n");
  366. Worklist.erase(Worklist.begin() + MaxClonesThreshold,
  367. Worklist.end());
  368. }
  369. if (IsPartial || Worklist.size() < ActualConstArg.size())
  370. for (auto &ActualArg : Worklist)
  371. ActualArg.Partial = true;
  372. LLVM_DEBUG(dbgs() << "Sorted list of candidates by gain:\n";
  373. for (auto &C
  374. : Worklist) {
  375. dbgs() << "- Function = " << C.Fn->getName() << ", ";
  376. dbgs() << "FormalArg = " << C.Arg->getName() << ", ";
  377. dbgs() << "ActualArg = " << C.Const->getName() << ", ";
  378. dbgs() << "Gain = " << C.Gain << "\n";
  379. });
  380. // FIXME: Only one argument per function.
  381. break;
  382. }
  383. return Worklist;
  384. }
  385. bool isCandidateFunction(Function *F, FuncList &Specializations) {
  386. // Do not specialize the cloned function again.
  387. if (SpecializedFuncs.contains(F))
  388. return false;
  389. // If we're optimizing the function for size, we shouldn't specialize it.
  390. if (F->hasOptSize() ||
  391. shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass))
  392. return false;
  393. // Exit if the function is not executable. There's no point in specializing
  394. // a dead function.
  395. if (!Solver.isBlockExecutable(&F->getEntryBlock()))
  396. return false;
  397. // It wastes time to specialize a function which would get inlined finally.
  398. if (F->hasFnAttribute(Attribute::AlwaysInline))
  399. return false;
  400. LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
  401. << "\n");
  402. return true;
  403. }
  404. void specializeFunction(ArgInfo &AI, FuncList &Specializations) {
  405. Function *Clone = cloneCandidateFunction(AI.Fn);
  406. Argument *ClonedArg = Clone->getArg(AI.Arg->getArgNo());
  407. // Rewrite calls to the function so that they call the clone instead.
  408. rewriteCallSites(AI.Fn, Clone, *ClonedArg, AI.Const);
  409. // Initialize the lattice state of the arguments of the function clone,
  410. // marking the argument on which we specialized the function constant
  411. // with the given value.
  412. Solver.markArgInFuncSpecialization(AI.Fn, ClonedArg, AI.Const);
  413. // Mark all the specialized functions
  414. Specializations.push_back(Clone);
  415. NbFunctionsSpecialized++;
  416. // If the function has been completely specialized, the original function
  417. // is no longer needed. Mark it unreachable.
  418. if (!AI.Partial)
  419. Solver.markFunctionUnreachable(AI.Fn);
  420. }
  421. /// Compute and return the cost of specializing function \p F.
  422. InstructionCost getSpecializationCost(Function *F) {
  423. // Compute the code metrics for the function.
  424. SmallPtrSet<const Value *, 32> EphValues;
  425. CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
  426. CodeMetrics Metrics;
  427. for (BasicBlock &BB : *F)
  428. Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
  429. // If the code metrics reveal that we shouldn't duplicate the function, we
  430. // shouldn't specialize it. Set the specialization cost to Invalid.
  431. // Or if the lines of codes implies that this function is easy to get
  432. // inlined so that we shouldn't specialize it.
  433. if (Metrics.notDuplicatable ||
  434. (!ForceFunctionSpecialization &&
  435. Metrics.NumInsts < SmallFunctionThreshold)) {
  436. InstructionCost C{};
  437. C.setInvalid();
  438. return C;
  439. }
  440. // Otherwise, set the specialization cost to be the cost of all the
  441. // instructions in the function and penalty for specializing more functions.
  442. unsigned Penalty = NbFunctionsSpecialized + 1;
  443. return Metrics.NumInsts * InlineConstants::InstrCost * Penalty;
  444. }
  445. InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
  446. LoopInfo &LI) {
  447. auto *I = dyn_cast_or_null<Instruction>(U);
  448. // If not an instruction we do not know how to evaluate.
  449. // Keep minimum possible cost for now so that it doesnt affect
  450. // specialization.
  451. if (!I)
  452. return std::numeric_limits<unsigned>::min();
  453. auto Cost = TTI.getUserCost(U, TargetTransformInfo::TCK_SizeAndLatency);
  454. // Traverse recursively if there are more uses.
  455. // TODO: Any other instructions to be added here?
  456. if (I->mayReadFromMemory() || I->isCast())
  457. for (auto *User : I->users())
  458. Cost += getUserBonus(User, TTI, LI);
  459. // Increase the cost if it is inside the loop.
  460. auto LoopDepth = LI.getLoopDepth(I->getParent());
  461. Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth);
  462. return Cost;
  463. }
  464. /// Compute a bonus for replacing argument \p A with constant \p C.
  465. InstructionCost getSpecializationBonus(Argument *A, Constant *C) {
  466. Function *F = A->getParent();
  467. DominatorTree DT(*F);
  468. LoopInfo LI(DT);
  469. auto &TTI = (GetTTI)(*F);
  470. LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for: " << *A
  471. << "\n");
  472. InstructionCost TotalCost = 0;
  473. for (auto *U : A->users()) {
  474. TotalCost += getUserBonus(U, TTI, LI);
  475. LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
  476. TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
  477. }
  478. // The below heuristic is only concerned with exposing inlining
  479. // opportunities via indirect call promotion. If the argument is not a
  480. // function pointer, give up.
  481. if (!isa<PointerType>(A->getType()) ||
  482. !isa<FunctionType>(A->getType()->getPointerElementType()))
  483. return TotalCost;
  484. // Since the argument is a function pointer, its incoming constant values
  485. // should be functions or constant expressions. The code below attempts to
  486. // look through cast expressions to find the function that will be called.
  487. Value *CalledValue = C;
  488. while (isa<ConstantExpr>(CalledValue) &&
  489. cast<ConstantExpr>(CalledValue)->isCast())
  490. CalledValue = cast<User>(CalledValue)->getOperand(0);
  491. Function *CalledFunction = dyn_cast<Function>(CalledValue);
  492. if (!CalledFunction)
  493. return TotalCost;
  494. // Get TTI for the called function (used for the inline cost).
  495. auto &CalleeTTI = (GetTTI)(*CalledFunction);
  496. // Look at all the call sites whose called value is the argument.
  497. // Specializing the function on the argument would allow these indirect
  498. // calls to be promoted to direct calls. If the indirect call promotion
  499. // would likely enable the called function to be inlined, specializing is a
  500. // good idea.
  501. int Bonus = 0;
  502. for (User *U : A->users()) {
  503. if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
  504. continue;
  505. auto *CS = cast<CallBase>(U);
  506. if (CS->getCalledOperand() != A)
  507. continue;
  508. // Get the cost of inlining the called function at this call site. Note
  509. // that this is only an estimate. The called function may eventually
  510. // change in a way that leads to it not being inlined here, even though
  511. // inlining looks profitable now. For example, one of its called
  512. // functions may be inlined into it, making the called function too large
  513. // to be inlined into this call site.
  514. //
  515. // We apply a boost for performing indirect call promotion by increasing
  516. // the default threshold by the threshold for indirect calls.
  517. auto Params = getInlineParams();
  518. Params.DefaultThreshold += InlineConstants::IndirectCallThreshold;
  519. InlineCost IC =
  520. getInlineCost(*CS, CalledFunction, Params, CalleeTTI, GetAC, GetTLI);
  521. // We clamp the bonus for this call to be between zero and the default
  522. // threshold.
  523. if (IC.isAlways())
  524. Bonus += Params.DefaultThreshold;
  525. else if (IC.isVariable() && IC.getCostDelta() > 0)
  526. Bonus += IC.getCostDelta();
  527. }
  528. return TotalCost + Bonus;
  529. }
  530. /// Determine if we should specialize a function based on the incoming values
  531. /// of the given argument.
  532. ///
  533. /// This function implements the goal-directed heuristic. It determines if
  534. /// specializing the function based on the incoming values of argument \p A
  535. /// would result in any significant optimization opportunities. If
  536. /// optimization opportunities exist, the constant values of \p A on which to
  537. /// specialize the function are collected in \p Constants. If the values in
  538. /// \p Constants represent the complete set of values that \p A can take on,
  539. /// the function will be completely specialized, and the \p IsPartial flag is
  540. /// set to false.
  541. ///
  542. /// \returns true if the function should be specialized on the given
  543. /// argument.
  544. bool isArgumentInteresting(Argument *A, ConstList &Constants,
  545. bool &IsPartial) {
  546. // For now, don't attempt to specialize functions based on the values of
  547. // composite types.
  548. if (!A->getType()->isSingleValueType() || A->user_empty())
  549. return false;
  550. // If the argument isn't overdefined, there's nothing to do. It should
  551. // already be constant.
  552. if (!Solver.getLatticeValueFor(A).isOverdefined()) {
  553. LLVM_DEBUG(dbgs() << "FnSpecialization: nothing to do, arg is already "
  554. << "constant?\n");
  555. return false;
  556. }
  557. // Collect the constant values that the argument can take on. If the
  558. // argument can't take on any constant values, we aren't going to
  559. // specialize the function. While it's possible to specialize the function
  560. // based on non-constant arguments, there's likely not much benefit to
  561. // constant propagation in doing so.
  562. //
  563. // TODO 1: currently it won't specialize if there are over the threshold of
  564. // calls using the same argument, e.g foo(a) x 4 and foo(b) x 1, but it
  565. // might be beneficial to take the occurrences into account in the cost
  566. // model, so we would need to find the unique constants.
  567. //
  568. // TODO 2: this currently does not support constants, i.e. integer ranges.
  569. //
  570. IsPartial = !getPossibleConstants(A, Constants);
  571. LLVM_DEBUG(dbgs() << "FnSpecialization: interesting arg: " << *A << "\n");
  572. return true;
  573. }
  574. /// Collect in \p Constants all the constant values that argument \p A can
  575. /// take on.
  576. ///
  577. /// \returns true if all of the values the argument can take on are constant
  578. /// (e.g., the argument's parent function cannot be called with an
  579. /// overdefined value).
  580. bool getPossibleConstants(Argument *A, ConstList &Constants) {
  581. Function *F = A->getParent();
  582. bool AllConstant = true;
  583. // Iterate over all the call sites of the argument's parent function.
  584. for (User *U : F->users()) {
  585. if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
  586. continue;
  587. auto &CS = *cast<CallBase>(U);
  588. // If the call site has attribute minsize set, that callsite won't be
  589. // specialized.
  590. if (CS.hasFnAttr(Attribute::MinSize)) {
  591. AllConstant = false;
  592. continue;
  593. }
  594. // If the parent of the call site will never be executed, we don't need
  595. // to worry about the passed value.
  596. if (!Solver.isBlockExecutable(CS.getParent()))
  597. continue;
  598. auto *V = CS.getArgOperand(A->getArgNo());
  599. if (isa<PoisonValue>(V))
  600. return false;
  601. // For now, constant expressions are fine but only if they are function
  602. // calls.
  603. if (auto *CE = dyn_cast<ConstantExpr>(V))
  604. if (!isa<Function>(CE->getOperand(0)))
  605. return false;
  606. // TrackValueOfGlobalVariable only tracks scalar global variables.
  607. if (auto *GV = dyn_cast<GlobalVariable>(V)) {
  608. // Check if we want to specialize on the address of non-constant
  609. // global values.
  610. if (!GV->isConstant())
  611. if (!SpecializeOnAddresses)
  612. return false;
  613. if (!GV->getValueType()->isSingleValueType())
  614. return false;
  615. }
  616. if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
  617. EnableSpecializationForLiteralConstant))
  618. Constants.push_back(cast<Constant>(V));
  619. else
  620. AllConstant = false;
  621. }
  622. // If the argument can only take on constant values, AllConstant will be
  623. // true.
  624. return AllConstant;
  625. }
  626. /// Rewrite calls to function \p F to call function \p Clone instead.
  627. ///
  628. /// This function modifies calls to function \p F whose argument at index \p
  629. /// ArgNo is equal to constant \p C. The calls are rewritten to call function
  630. /// \p Clone instead.
  631. ///
  632. /// Callsites that have been marked with the MinSize function attribute won't
  633. /// be specialized and rewritten.
  634. void rewriteCallSites(Function *F, Function *Clone, Argument &Arg,
  635. Constant *C) {
  636. unsigned ArgNo = Arg.getArgNo();
  637. SmallVector<CallBase *, 4> CallSitesToRewrite;
  638. for (auto *U : F->users()) {
  639. if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
  640. continue;
  641. auto &CS = *cast<CallBase>(U);
  642. if (!CS.getCalledFunction() || CS.getCalledFunction() != F)
  643. continue;
  644. CallSitesToRewrite.push_back(&CS);
  645. }
  646. for (auto *CS : CallSitesToRewrite) {
  647. if ((CS->getFunction() == Clone && CS->getArgOperand(ArgNo) == &Arg) ||
  648. CS->getArgOperand(ArgNo) == C) {
  649. CS->setCalledFunction(Clone);
  650. Solver.markOverdefined(CS);
  651. }
  652. }
  653. }
  654. void updateSpecializedFuncs(FuncList &FuncDecls,
  655. FuncList &CurrentSpecializations) {
  656. for (auto *SpecializedFunc : CurrentSpecializations) {
  657. SpecializedFuncs.insert(SpecializedFunc);
  658. // Initialize the state of the newly created functions, marking them
  659. // argument-tracked and executable.
  660. if (SpecializedFunc->hasExactDefinition() &&
  661. !SpecializedFunc->hasFnAttribute(Attribute::Naked))
  662. Solver.addTrackedFunction(SpecializedFunc);
  663. Solver.addArgumentTrackedFunction(SpecializedFunc);
  664. FuncDecls.push_back(SpecializedFunc);
  665. Solver.markBlockExecutable(&SpecializedFunc->front());
  666. // Replace the function arguments for the specialized functions.
  667. for (Argument &Arg : SpecializedFunc->args())
  668. if (!Arg.use_empty() && tryToReplaceWithConstant(&Arg))
  669. LLVM_DEBUG(dbgs() << "FnSpecialization: Replaced constant argument: "
  670. << Arg.getName() << "\n");
  671. }
  672. }
  673. };
  674. } // namespace
  675. bool llvm::runFunctionSpecialization(
  676. Module &M, const DataLayout &DL,
  677. std::function<TargetLibraryInfo &(Function &)> GetTLI,
  678. std::function<TargetTransformInfo &(Function &)> GetTTI,
  679. std::function<AssumptionCache &(Function &)> GetAC,
  680. function_ref<AnalysisResultsForFn(Function &)> GetAnalysis) {
  681. SCCPSolver Solver(DL, GetTLI, M.getContext());
  682. FunctionSpecializer FS(Solver, GetAC, GetTTI, GetTLI);
  683. bool Changed = false;
  684. // Loop over all functions, marking arguments to those with their addresses
  685. // taken or that are external as overdefined.
  686. for (Function &F : M) {
  687. if (F.isDeclaration())
  688. continue;
  689. if (F.hasFnAttribute(Attribute::NoDuplicate))
  690. continue;
  691. LLVM_DEBUG(dbgs() << "\nFnSpecialization: Analysing decl: " << F.getName()
  692. << "\n");
  693. Solver.addAnalysis(F, GetAnalysis(F));
  694. // Determine if we can track the function's arguments. If so, add the
  695. // function to the solver's set of argument-tracked functions.
  696. if (canTrackArgumentsInterprocedurally(&F)) {
  697. LLVM_DEBUG(dbgs() << "FnSpecialization: Can track arguments\n");
  698. Solver.addArgumentTrackedFunction(&F);
  699. continue;
  700. } else {
  701. LLVM_DEBUG(dbgs() << "FnSpecialization: Can't track arguments!\n"
  702. << "FnSpecialization: Doesn't have local linkage, or "
  703. << "has its address taken\n");
  704. }
  705. // Assume the function is called.
  706. Solver.markBlockExecutable(&F.front());
  707. // Assume nothing about the incoming arguments.
  708. for (Argument &AI : F.args())
  709. Solver.markOverdefined(&AI);
  710. }
  711. // Determine if we can track any of the module's global variables. If so, add
  712. // the global variables we can track to the solver's set of tracked global
  713. // variables.
  714. for (GlobalVariable &G : M.globals()) {
  715. G.removeDeadConstantUsers();
  716. if (canTrackGlobalVariableInterprocedurally(&G))
  717. Solver.trackValueOfGlobalVariable(&G);
  718. }
  719. auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
  720. SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
  721. TrackedFuncs.end());
  722. // No tracked functions, so nothing to do: don't run the solver and remove
  723. // the ssa_copy intrinsics that may have been introduced.
  724. if (TrackedFuncs.empty()) {
  725. removeSSACopy(M);
  726. return false;
  727. }
  728. // Solve for constants.
  729. auto RunSCCPSolver = [&](auto &WorkList) {
  730. bool ResolvedUndefs = true;
  731. while (ResolvedUndefs) {
  732. // Not running the solver unnecessary is checked in regression test
  733. // nothing-to-do.ll, so if this debug message is changed, this regression
  734. // test needs updating too.
  735. LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n");
  736. Solver.solve();
  737. LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n");
  738. ResolvedUndefs = false;
  739. for (Function *F : WorkList)
  740. if (Solver.resolvedUndefsIn(*F))
  741. ResolvedUndefs = true;
  742. }
  743. for (auto *F : WorkList) {
  744. for (BasicBlock &BB : *F) {
  745. if (!Solver.isBlockExecutable(&BB))
  746. continue;
  747. // FIXME: The solver may make changes to the function here, so set
  748. // Changed, even if later function specialization does not trigger.
  749. for (auto &I : make_early_inc_range(BB))
  750. Changed |= FS.tryToReplaceWithConstant(&I);
  751. }
  752. }
  753. };
  754. #ifndef NDEBUG
  755. LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n");
  756. for (auto *F : FuncDecls)
  757. LLVM_DEBUG(dbgs() << "FnSpecialization: *) " << F->getName() << "\n");
  758. #endif
  759. // Initially resolve the constants in all the argument tracked functions.
  760. RunSCCPSolver(FuncDecls);
  761. SmallVector<Function *, 2> CurrentSpecializations;
  762. unsigned I = 0;
  763. while (FuncSpecializationMaxIters != I++ &&
  764. FS.specializeFunctions(FuncDecls, CurrentSpecializations)) {
  765. // Run the solver for the specialized functions.
  766. RunSCCPSolver(CurrentSpecializations);
  767. // Replace some unresolved constant arguments.
  768. constantArgPropagation(FuncDecls, M, Solver);
  769. CurrentSpecializations.clear();
  770. Changed = true;
  771. }
  772. // Clean up the IR by removing ssa_copy intrinsics.
  773. removeSSACopy(M);
  774. return Changed;
  775. }