PartiallyInlineLibCalls.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass tries to partially inline the fast path of well-known library
  10. // functions, such as using square-root instructions for cases where sqrt()
  11. // does not need to set errno.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
  15. #include "llvm/Analysis/DomTreeUpdater.h"
  16. #include "llvm/Analysis/TargetLibraryInfo.h"
  17. #include "llvm/Analysis/TargetTransformInfo.h"
  18. #include "llvm/IR/Dominators.h"
  19. #include "llvm/IR/IRBuilder.h"
  20. #include "llvm/InitializePasses.h"
  21. #include "llvm/Support/DebugCounter.h"
  22. #include "llvm/Transforms/Scalar.h"
  23. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  24. #include <optional>
  25. using namespace llvm;
  26. #define DEBUG_TYPE "partially-inline-libcalls"
  27. DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
  28. "Controls transformations in partially-inline-libcalls");
  29. static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
  30. BasicBlock &CurrBB, Function::iterator &BB,
  31. const TargetTransformInfo *TTI, DomTreeUpdater *DTU) {
  32. // There is no need to change the IR, since backend will emit sqrt
  33. // instruction if the call has already been marked read-only.
  34. if (Call->onlyReadsMemory())
  35. return false;
  36. if (!DebugCounter::shouldExecute(PILCounter))
  37. return false;
  38. // Do the following transformation:
  39. //
  40. // (before)
  41. // dst = sqrt(src)
  42. //
  43. // (after)
  44. // v0 = sqrt_noreadmem(src) # native sqrt instruction.
  45. // [if (v0 is a NaN) || if (src < 0)]
  46. // v1 = sqrt(src) # library call.
  47. // dst = phi(v0, v1)
  48. //
  49. Type *Ty = Call->getType();
  50. IRBuilder<> Builder(Call->getNextNode());
  51. // Split CurrBB right after the call, create a 'then' block (that branches
  52. // back to split-off tail of CurrBB) into which we'll insert a libcall.
  53. Instruction *LibCallTerm = SplitBlockAndInsertIfThen(
  54. Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false,
  55. /*BranchWeights*/ nullptr, DTU);
  56. auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator());
  57. // We want an 'else' block though, not a 'then' block.
  58. cast<BranchInst>(CurrBBTerm)->swapSuccessors();
  59. // Create phi that will merge results of either sqrt and replace all uses.
  60. BasicBlock *JoinBB = LibCallTerm->getSuccessor(0);
  61. JoinBB->setName(CurrBB.getName() + ".split");
  62. Builder.SetInsertPoint(JoinBB, JoinBB->begin());
  63. PHINode *Phi = Builder.CreatePHI(Ty, 2);
  64. Call->replaceAllUsesWith(Phi);
  65. // Finally, insert the libcall into 'else' block.
  66. BasicBlock *LibCallBB = LibCallTerm->getParent();
  67. LibCallBB->setName("call.sqrt");
  68. Builder.SetInsertPoint(LibCallTerm);
  69. Instruction *LibCall = Call->clone();
  70. Builder.Insert(LibCall);
  71. // Add memory(none) attribute, so that the backend can use a native sqrt
  72. // instruction for this call.
  73. Call->setDoesNotAccessMemory();
  74. // Insert a FP compare instruction and use it as the CurrBB branch condition.
  75. Builder.SetInsertPoint(CurrBBTerm);
  76. Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty)
  77. ? Builder.CreateFCmpORD(Call, Call)
  78. : Builder.CreateFCmpOGE(Call->getOperand(0),
  79. ConstantFP::get(Ty, 0.0));
  80. CurrBBTerm->setCondition(FCmp);
  81. // Add phi operands.
  82. Phi->addIncoming(Call, &CurrBB);
  83. Phi->addIncoming(LibCall, LibCallBB);
  84. BB = JoinBB->getIterator();
  85. return true;
  86. }
  87. static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
  88. const TargetTransformInfo *TTI,
  89. DominatorTree *DT) {
  90. std::optional<DomTreeUpdater> DTU;
  91. if (DT)
  92. DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
  93. bool Changed = false;
  94. Function::iterator CurrBB;
  95. for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
  96. CurrBB = BB++;
  97. for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
  98. II != IE; ++II) {
  99. CallInst *Call = dyn_cast<CallInst>(&*II);
  100. Function *CalledFunc;
  101. if (!Call || !(CalledFunc = Call->getCalledFunction()))
  102. continue;
  103. if (Call->isNoBuiltin() || Call->isStrictFP())
  104. continue;
  105. if (Call->isMustTailCall())
  106. continue;
  107. // Skip if function either has local linkage or is not a known library
  108. // function.
  109. LibFunc LF;
  110. if (CalledFunc->hasLocalLinkage() ||
  111. !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF))
  112. continue;
  113. switch (LF) {
  114. case LibFunc_sqrtf:
  115. case LibFunc_sqrt:
  116. if (TTI->haveFastSqrt(Call->getType()) &&
  117. optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
  118. DTU ? &*DTU : nullptr))
  119. break;
  120. continue;
  121. default:
  122. continue;
  123. }
  124. Changed = true;
  125. break;
  126. }
  127. }
  128. return Changed;
  129. }
  130. PreservedAnalyses
  131. PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
  132. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  133. auto &TTI = AM.getResult<TargetIRAnalysis>(F);
  134. auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
  135. if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT))
  136. return PreservedAnalyses::all();
  137. PreservedAnalyses PA;
  138. PA.preserve<DominatorTreeAnalysis>();
  139. return PA;
  140. }
  141. namespace {
  142. class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
  143. public:
  144. static char ID;
  145. PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
  146. initializePartiallyInlineLibCallsLegacyPassPass(
  147. *PassRegistry::getPassRegistry());
  148. }
  149. void getAnalysisUsage(AnalysisUsage &AU) const override {
  150. AU.addRequired<TargetLibraryInfoWrapperPass>();
  151. AU.addRequired<TargetTransformInfoWrapperPass>();
  152. AU.addPreserved<DominatorTreeWrapperPass>();
  153. FunctionPass::getAnalysisUsage(AU);
  154. }
  155. bool runOnFunction(Function &F) override {
  156. if (skipFunction(F))
  157. return false;
  158. TargetLibraryInfo *TLI =
  159. &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  160. const TargetTransformInfo *TTI =
  161. &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
  162. DominatorTree *DT = nullptr;
  163. if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
  164. DT = &DTWP->getDomTree();
  165. return runPartiallyInlineLibCalls(F, TLI, TTI, DT);
  166. }
  167. };
  168. }
  169. char PartiallyInlineLibCallsLegacyPass::ID = 0;
  170. INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
  171. "partially-inline-libcalls",
  172. "Partially inline calls to library functions", false,
  173. false)
  174. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  175. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  176. INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
  177. INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
  178. "partially-inline-libcalls",
  179. "Partially inline calls to library functions", false, false)
  180. FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
  181. return new PartiallyInlineLibCallsLegacyPass();
  182. }