MergeFunctions.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971
  1. //===- MergeFunctions.cpp - Merge identical functions ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass looks for equivalent functions that are mergable and folds them.
  10. //
  11. // Order relation is defined on set of functions. It was made through
  12. // special function comparison procedure that returns
  13. // 0 when functions are equal,
  14. // -1 when Left function is less than right function, and
  15. // 1 for opposite case. We need total-ordering, so we need to maintain
  16. // four properties on the functions set:
  17. // a <= a (reflexivity)
  18. // if a <= b and b <= a then a = b (antisymmetry)
  19. // if a <= b and b <= c then a <= c (transitivity).
  20. // for all a and b: a <= b or b <= a (totality).
  21. //
  22. // Comparison iterates through each instruction in each basic block.
  23. // Functions are kept on binary tree. For each new function F we perform
  24. // lookup in binary tree.
  25. // In practice it works the following way:
  26. // -- We define Function* container class with custom "operator<" (FunctionPtr).
  27. // -- "FunctionPtr" instances are stored in std::set collection, so every
  28. // std::set::insert operation will give you result in log(N) time.
  29. //
  30. // As an optimization, a hash of the function structure is calculated first, and
  31. // two functions are only compared if they have the same hash. This hash is
  32. // cheap to compute, and has the property that if function F == G according to
  33. // the comparison function, then hash(F) == hash(G). This consistency property
  34. // is critical to ensuring all possible merging opportunities are exploited.
  35. // Collisions in the hash affect the speed of the pass but not the correctness
  36. // or determinism of the resulting transformation.
  37. //
  38. // When a match is found the functions are folded. If both functions are
  39. // overridable, we move the functionality into a new internal function and
  40. // leave two overridable thunks to it.
  41. //
  42. //===----------------------------------------------------------------------===//
  43. //
  44. // Future work:
  45. //
  46. // * virtual functions.
  47. //
  48. // Many functions have their address taken by the virtual function table for
  49. // the object they belong to. However, as long as it's only used for a lookup
  50. // and call, this is irrelevant, and we'd like to fold such functions.
  51. //
  52. // * be smarter about bitcasts.
  53. //
  54. // In order to fold functions, we will sometimes add either bitcast instructions
  55. // or bitcast constant expressions. Unfortunately, this can confound further
  56. // analysis since the two functions differ where one has a bitcast and the
  57. // other doesn't. We should learn to look through bitcasts.
  58. //
  59. // * Compare complex types with pointer types inside.
  60. // * Compare cross-reference cases.
  61. // * Compare complex expressions.
  62. //
  63. // All the three issues above could be described as ability to prove that
  64. // fA == fB == fC == fE == fF == fG in example below:
  65. //
  66. // void fA() {
  67. // fB();
  68. // }
  69. // void fB() {
  70. // fA();
  71. // }
  72. //
  73. // void fE() {
  74. // fF();
  75. // }
  76. // void fF() {
  77. // fG();
  78. // }
  79. // void fG() {
  80. // fE();
  81. // }
  82. //
  83. // Simplest cross-reference case (fA <--> fB) was implemented in previous
  84. // versions of MergeFunctions, though it presented only in two function pairs
  85. // in test-suite (that counts >50k functions)
  86. // Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
  87. // could cover much more cases.
  88. //
  89. //===----------------------------------------------------------------------===//
  90. #include "llvm/Transforms/IPO/MergeFunctions.h"
  91. #include "llvm/ADT/ArrayRef.h"
  92. #include "llvm/ADT/SmallVector.h"
  93. #include "llvm/ADT/Statistic.h"
  94. #include "llvm/IR/Argument.h"
  95. #include "llvm/IR/BasicBlock.h"
  96. #include "llvm/IR/Constant.h"
  97. #include "llvm/IR/Constants.h"
  98. #include "llvm/IR/DebugInfoMetadata.h"
  99. #include "llvm/IR/DebugLoc.h"
  100. #include "llvm/IR/DerivedTypes.h"
  101. #include "llvm/IR/Function.h"
  102. #include "llvm/IR/GlobalValue.h"
  103. #include "llvm/IR/IRBuilder.h"
  104. #include "llvm/IR/InstrTypes.h"
  105. #include "llvm/IR/Instruction.h"
  106. #include "llvm/IR/Instructions.h"
  107. #include "llvm/IR/IntrinsicInst.h"
  108. #include "llvm/IR/Module.h"
  109. #include "llvm/IR/Type.h"
  110. #include "llvm/IR/Use.h"
  111. #include "llvm/IR/User.h"
  112. #include "llvm/IR/Value.h"
  113. #include "llvm/IR/ValueHandle.h"
  114. #include "llvm/InitializePasses.h"
  115. #include "llvm/Pass.h"
  116. #include "llvm/Support/Casting.h"
  117. #include "llvm/Support/CommandLine.h"
  118. #include "llvm/Support/Debug.h"
  119. #include "llvm/Support/raw_ostream.h"
  120. #include "llvm/Transforms/IPO.h"
  121. #include "llvm/Transforms/Utils/FunctionComparator.h"
  122. #include "llvm/Transforms/Utils/ModuleUtils.h"
  123. #include <algorithm>
  124. #include <cassert>
  125. #include <iterator>
  126. #include <set>
  127. #include <utility>
  128. #include <vector>
  129. using namespace llvm;
  130. #define DEBUG_TYPE "mergefunc"
  131. STATISTIC(NumFunctionsMerged, "Number of functions merged");
  132. STATISTIC(NumThunksWritten, "Number of thunks generated");
  133. STATISTIC(NumAliasesWritten, "Number of aliases generated");
  134. STATISTIC(NumDoubleWeak, "Number of new functions created");
  135. static cl::opt<unsigned> NumFunctionsForVerificationCheck(
  136. "mergefunc-verify",
  137. cl::desc("How many functions in a module could be used for "
  138. "MergeFunctions to pass a basic correctness check. "
  139. "'0' disables this check. Works only with '-debug' key."),
  140. cl::init(0), cl::Hidden);
  141. // Under option -mergefunc-preserve-debug-info we:
  142. // - Do not create a new function for a thunk.
  143. // - Retain the debug info for a thunk's parameters (and associated
  144. // instructions for the debug info) from the entry block.
  145. // Note: -debug will display the algorithm at work.
  146. // - Create debug-info for the call (to the shared implementation) made by
  147. // a thunk and its return value.
  148. // - Erase the rest of the function, retaining the (minimally sized) entry
  149. // block to create a thunk.
  150. // - Preserve a thunk's call site to point to the thunk even when both occur
  151. // within the same translation unit, to aid debugability. Note that this
  152. // behaviour differs from the underlying -mergefunc implementation which
  153. // modifies the thunk's call site to point to the shared implementation
  154. // when both occur within the same translation unit.
  155. static cl::opt<bool>
  156. MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden,
  157. cl::init(false),
  158. cl::desc("Preserve debug info in thunk when mergefunc "
  159. "transformations are made."));
  160. static cl::opt<bool>
  161. MergeFunctionsAliases("mergefunc-use-aliases", cl::Hidden,
  162. cl::init(false),
  163. cl::desc("Allow mergefunc to create aliases"));
  164. namespace {
  165. class FunctionNode {
  166. mutable AssertingVH<Function> F;
  167. FunctionComparator::FunctionHash Hash;
  168. public:
  169. // Note the hash is recalculated potentially multiple times, but it is cheap.
  170. FunctionNode(Function *F)
  171. : F(F), Hash(FunctionComparator::functionHash(*F)) {}
  172. Function *getFunc() const { return F; }
  173. FunctionComparator::FunctionHash getHash() const { return Hash; }
  174. /// Replace the reference to the function F by the function G, assuming their
  175. /// implementations are equal.
  176. void replaceBy(Function *G) const {
  177. F = G;
  178. }
  179. };
  180. /// MergeFunctions finds functions which will generate identical machine code,
  181. /// by considering all pointer types to be equivalent. Once identified,
  182. /// MergeFunctions will fold them by replacing a call to one to a call to a
  183. /// bitcast of the other.
  184. class MergeFunctions {
  185. public:
  186. MergeFunctions() : FnTree(FunctionNodeCmp(&GlobalNumbers)) {
  187. }
  188. bool runOnModule(Module &M);
  189. private:
  190. // The function comparison operator is provided here so that FunctionNodes do
  191. // not need to become larger with another pointer.
  192. class FunctionNodeCmp {
  193. GlobalNumberState* GlobalNumbers;
  194. public:
  195. FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
  196. bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
  197. // Order first by hashes, then full function comparison.
  198. if (LHS.getHash() != RHS.getHash())
  199. return LHS.getHash() < RHS.getHash();
  200. FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
  201. return FCmp.compare() < 0;
  202. }
  203. };
  204. using FnTreeType = std::set<FunctionNode, FunctionNodeCmp>;
  205. GlobalNumberState GlobalNumbers;
  206. /// A work queue of functions that may have been modified and should be
  207. /// analyzed again.
  208. std::vector<WeakTrackingVH> Deferred;
  209. /// Set of values marked as used in llvm.used and llvm.compiler.used.
  210. SmallPtrSet<GlobalValue *, 4> Used;
  211. #ifndef NDEBUG
  212. /// Checks the rules of order relation introduced among functions set.
  213. /// Returns true, if check has been passed, and false if failed.
  214. bool doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist);
  215. #endif
  216. /// Insert a ComparableFunction into the FnTree, or merge it away if it's
  217. /// equal to one that's already present.
  218. bool insert(Function *NewFunction);
  219. /// Remove a Function from the FnTree and queue it up for a second sweep of
  220. /// analysis.
  221. void remove(Function *F);
  222. /// Find the functions that use this Value and remove them from FnTree and
  223. /// queue the functions.
  224. void removeUsers(Value *V);
  225. /// Replace all direct calls of Old with calls of New. Will bitcast New if
  226. /// necessary to make types match.
  227. void replaceDirectCallers(Function *Old, Function *New);
  228. /// Merge two equivalent functions. Upon completion, G may be deleted, or may
  229. /// be converted into a thunk. In either case, it should never be visited
  230. /// again.
  231. void mergeTwoFunctions(Function *F, Function *G);
  232. /// Fill PDIUnrelatedWL with instructions from the entry block that are
  233. /// unrelated to parameter related debug info.
  234. void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
  235. std::vector<Instruction *> &PDIUnrelatedWL);
  236. /// Erase the rest of the CFG (i.e. barring the entry block).
  237. void eraseTail(Function *G);
  238. /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
  239. /// parameter debug info, from the entry block.
  240. void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL);
  241. /// Replace G with a simple tail call to bitcast(F). Also (unless
  242. /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
  243. /// delete G.
  244. void writeThunk(Function *F, Function *G);
  245. // Replace G with an alias to F (deleting function G)
  246. void writeAlias(Function *F, Function *G);
  247. // Replace G with an alias to F if possible, or a thunk to F if possible.
  248. // Returns false if neither is the case.
  249. bool writeThunkOrAlias(Function *F, Function *G);
  250. /// Replace function F with function G in the function tree.
  251. void replaceFunctionInTree(const FunctionNode &FN, Function *G);
  252. /// The set of all distinct functions. Use the insert() and remove() methods
  253. /// to modify it. The map allows efficient lookup and deferring of Functions.
  254. FnTreeType FnTree;
  255. // Map functions to the iterators of the FunctionNode which contains them
  256. // in the FnTree. This must be updated carefully whenever the FnTree is
  257. // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
  258. // dangling iterators into FnTree. The invariant that preserves this is that
  259. // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
  260. DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
  261. };
  262. class MergeFunctionsLegacyPass : public ModulePass {
  263. public:
  264. static char ID;
  265. MergeFunctionsLegacyPass(): ModulePass(ID) {
  266. initializeMergeFunctionsLegacyPassPass(*PassRegistry::getPassRegistry());
  267. }
  268. bool runOnModule(Module &M) override {
  269. if (skipModule(M))
  270. return false;
  271. MergeFunctions MF;
  272. return MF.runOnModule(M);
  273. }
  274. };
  275. } // end anonymous namespace
  276. char MergeFunctionsLegacyPass::ID = 0;
  277. INITIALIZE_PASS(MergeFunctionsLegacyPass, "mergefunc",
  278. "Merge Functions", false, false)
  279. ModulePass *llvm::createMergeFunctionsPass() {
  280. return new MergeFunctionsLegacyPass();
  281. }
  282. PreservedAnalyses MergeFunctionsPass::run(Module &M,
  283. ModuleAnalysisManager &AM) {
  284. MergeFunctions MF;
  285. if (!MF.runOnModule(M))
  286. return PreservedAnalyses::all();
  287. return PreservedAnalyses::none();
  288. }
  289. #ifndef NDEBUG
  290. bool MergeFunctions::doFunctionalCheck(std::vector<WeakTrackingVH> &Worklist) {
  291. if (const unsigned Max = NumFunctionsForVerificationCheck) {
  292. unsigned TripleNumber = 0;
  293. bool Valid = true;
  294. dbgs() << "MERGEFUNC-VERIFY: Started for first " << Max << " functions.\n";
  295. unsigned i = 0;
  296. for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
  297. E = Worklist.end();
  298. I != E && i < Max; ++I, ++i) {
  299. unsigned j = i;
  300. for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
  301. ++J, ++j) {
  302. Function *F1 = cast<Function>(*I);
  303. Function *F2 = cast<Function>(*J);
  304. int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
  305. int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
  306. // If F1 <= F2, then F2 >= F1, otherwise report failure.
  307. if (Res1 != -Res2) {
  308. dbgs() << "MERGEFUNC-VERIFY: Non-symmetric; triple: " << TripleNumber
  309. << "\n";
  310. dbgs() << *F1 << '\n' << *F2 << '\n';
  311. Valid = false;
  312. }
  313. if (Res1 == 0)
  314. continue;
  315. unsigned k = j;
  316. for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
  317. ++k, ++K, ++TripleNumber) {
  318. if (K == J)
  319. continue;
  320. Function *F3 = cast<Function>(*K);
  321. int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
  322. int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
  323. bool Transitive = true;
  324. if (Res1 != 0 && Res1 == Res4) {
  325. // F1 > F2, F2 > F3 => F1 > F3
  326. Transitive = Res3 == Res1;
  327. } else if (Res3 != 0 && Res3 == -Res4) {
  328. // F1 > F3, F3 > F2 => F1 > F2
  329. Transitive = Res3 == Res1;
  330. } else if (Res4 != 0 && -Res3 == Res4) {
  331. // F2 > F3, F3 > F1 => F2 > F1
  332. Transitive = Res4 == -Res1;
  333. }
  334. if (!Transitive) {
  335. dbgs() << "MERGEFUNC-VERIFY: Non-transitive; triple: "
  336. << TripleNumber << "\n";
  337. dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
  338. << Res4 << "\n";
  339. dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n';
  340. Valid = false;
  341. }
  342. }
  343. }
  344. }
  345. dbgs() << "MERGEFUNC-VERIFY: " << (Valid ? "Passed." : "Failed.") << "\n";
  346. return Valid;
  347. }
  348. return true;
  349. }
  350. #endif
  351. /// Check whether \p F is eligible for function merging.
  352. static bool isEligibleForMerging(Function &F) {
  353. return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
  354. }
  355. bool MergeFunctions::runOnModule(Module &M) {
  356. bool Changed = false;
  357. SmallVector<GlobalValue *, 4> UsedV;
  358. collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/false);
  359. collectUsedGlobalVariables(M, UsedV, /*CompilerUsed=*/true);
  360. Used.insert(UsedV.begin(), UsedV.end());
  361. // All functions in the module, ordered by hash. Functions with a unique
  362. // hash value are easily eliminated.
  363. std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
  364. HashedFuncs;
  365. for (Function &Func : M) {
  366. if (isEligibleForMerging(Func)) {
  367. HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
  368. }
  369. }
  370. llvm::stable_sort(HashedFuncs, less_first());
  371. auto S = HashedFuncs.begin();
  372. for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
  373. // If the hash value matches the previous value or the next one, we must
  374. // consider merging it. Otherwise it is dropped and never considered again.
  375. if ((I != S && std::prev(I)->first == I->first) ||
  376. (std::next(I) != IE && std::next(I)->first == I->first) ) {
  377. Deferred.push_back(WeakTrackingVH(I->second));
  378. }
  379. }
  380. do {
  381. std::vector<WeakTrackingVH> Worklist;
  382. Deferred.swap(Worklist);
  383. LLVM_DEBUG(doFunctionalCheck(Worklist));
  384. LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n');
  385. LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
  386. // Insert functions and merge them.
  387. for (WeakTrackingVH &I : Worklist) {
  388. if (!I)
  389. continue;
  390. Function *F = cast<Function>(I);
  391. if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) {
  392. Changed |= insert(F);
  393. }
  394. }
  395. LLVM_DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
  396. } while (!Deferred.empty());
  397. FnTree.clear();
  398. FNodesInTree.clear();
  399. GlobalNumbers.clear();
  400. Used.clear();
  401. return Changed;
  402. }
  403. // Replace direct callers of Old with New.
  404. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
  405. Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
  406. for (Use &U : llvm::make_early_inc_range(Old->uses())) {
  407. CallBase *CB = dyn_cast<CallBase>(U.getUser());
  408. if (CB && CB->isCallee(&U)) {
  409. // Do not copy attributes from the called function to the call-site.
  410. // Function comparison ensures that the attributes are the same up to
  411. // type congruences in byval(), in which case we need to keep the byval
  412. // type of the call-site, not the callee function.
  413. remove(CB->getFunction());
  414. U.set(BitcastNew);
  415. }
  416. }
  417. }
  418. // Helper for writeThunk,
  419. // Selects proper bitcast operation,
  420. // but a bit simpler then CastInst::getCastOpcode.
  421. static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
  422. Type *SrcTy = V->getType();
  423. if (SrcTy->isStructTy()) {
  424. assert(DestTy->isStructTy());
  425. assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
  426. Value *Result = PoisonValue::get(DestTy);
  427. for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
  428. Value *Element =
  429. createCast(Builder, Builder.CreateExtractValue(V, ArrayRef(I)),
  430. DestTy->getStructElementType(I));
  431. Result = Builder.CreateInsertValue(Result, Element, ArrayRef(I));
  432. }
  433. return Result;
  434. }
  435. assert(!DestTy->isStructTy());
  436. if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
  437. return Builder.CreateIntToPtr(V, DestTy);
  438. else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
  439. return Builder.CreatePtrToInt(V, DestTy);
  440. else
  441. return Builder.CreateBitCast(V, DestTy);
  442. }
  443. // Erase the instructions in PDIUnrelatedWL as they are unrelated to the
  444. // parameter debug info, from the entry block.
  445. void MergeFunctions::eraseInstsUnrelatedToPDI(
  446. std::vector<Instruction *> &PDIUnrelatedWL) {
  447. LLVM_DEBUG(
  448. dbgs() << " Erasing instructions (in reverse order of appearance in "
  449. "entry block) unrelated to parameter debug info from entry "
  450. "block: {\n");
  451. while (!PDIUnrelatedWL.empty()) {
  452. Instruction *I = PDIUnrelatedWL.back();
  453. LLVM_DEBUG(dbgs() << " Deleting Instruction: ");
  454. LLVM_DEBUG(I->print(dbgs()));
  455. LLVM_DEBUG(dbgs() << "\n");
  456. I->eraseFromParent();
  457. PDIUnrelatedWL.pop_back();
  458. }
  459. LLVM_DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter "
  460. "debug info from entry block. \n");
  461. }
  462. // Reduce G to its entry block.
  463. void MergeFunctions::eraseTail(Function *G) {
  464. std::vector<BasicBlock *> WorklistBB;
  465. for (BasicBlock &BB : drop_begin(*G)) {
  466. BB.dropAllReferences();
  467. WorklistBB.push_back(&BB);
  468. }
  469. while (!WorklistBB.empty()) {
  470. BasicBlock *BB = WorklistBB.back();
  471. BB->eraseFromParent();
  472. WorklistBB.pop_back();
  473. }
  474. }
  475. // We are interested in the following instructions from the entry block as being
  476. // related to parameter debug info:
  477. // - @llvm.dbg.declare
  478. // - stores from the incoming parameters to locations on the stack-frame
  479. // - allocas that create these locations on the stack-frame
  480. // - @llvm.dbg.value
  481. // - the entry block's terminator
  482. // The rest are unrelated to debug info for the parameters; fill up
  483. // PDIUnrelatedWL with such instructions.
  484. void MergeFunctions::filterInstsUnrelatedToPDI(
  485. BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
  486. std::set<Instruction *> PDIRelated;
  487. for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
  488. BI != BIE; ++BI) {
  489. if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
  490. LLVM_DEBUG(dbgs() << " Deciding: ");
  491. LLVM_DEBUG(BI->print(dbgs()));
  492. LLVM_DEBUG(dbgs() << "\n");
  493. DILocalVariable *DILocVar = DVI->getVariable();
  494. if (DILocVar->isParameter()) {
  495. LLVM_DEBUG(dbgs() << " Include (parameter): ");
  496. LLVM_DEBUG(BI->print(dbgs()));
  497. LLVM_DEBUG(dbgs() << "\n");
  498. PDIRelated.insert(&*BI);
  499. } else {
  500. LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
  501. LLVM_DEBUG(BI->print(dbgs()));
  502. LLVM_DEBUG(dbgs() << "\n");
  503. }
  504. } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
  505. LLVM_DEBUG(dbgs() << " Deciding: ");
  506. LLVM_DEBUG(BI->print(dbgs()));
  507. LLVM_DEBUG(dbgs() << "\n");
  508. DILocalVariable *DILocVar = DDI->getVariable();
  509. if (DILocVar->isParameter()) {
  510. LLVM_DEBUG(dbgs() << " Parameter: ");
  511. LLVM_DEBUG(DILocVar->print(dbgs()));
  512. AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
  513. if (AI) {
  514. LLVM_DEBUG(dbgs() << " Processing alloca users: ");
  515. LLVM_DEBUG(dbgs() << "\n");
  516. for (User *U : AI->users()) {
  517. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  518. if (Value *Arg = SI->getValueOperand()) {
  519. if (isa<Argument>(Arg)) {
  520. LLVM_DEBUG(dbgs() << " Include: ");
  521. LLVM_DEBUG(AI->print(dbgs()));
  522. LLVM_DEBUG(dbgs() << "\n");
  523. PDIRelated.insert(AI);
  524. LLVM_DEBUG(dbgs() << " Include (parameter): ");
  525. LLVM_DEBUG(SI->print(dbgs()));
  526. LLVM_DEBUG(dbgs() << "\n");
  527. PDIRelated.insert(SI);
  528. LLVM_DEBUG(dbgs() << " Include: ");
  529. LLVM_DEBUG(BI->print(dbgs()));
  530. LLVM_DEBUG(dbgs() << "\n");
  531. PDIRelated.insert(&*BI);
  532. } else {
  533. LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
  534. LLVM_DEBUG(SI->print(dbgs()));
  535. LLVM_DEBUG(dbgs() << "\n");
  536. }
  537. }
  538. } else {
  539. LLVM_DEBUG(dbgs() << " Defer: ");
  540. LLVM_DEBUG(U->print(dbgs()));
  541. LLVM_DEBUG(dbgs() << "\n");
  542. }
  543. }
  544. } else {
  545. LLVM_DEBUG(dbgs() << " Delete (alloca NULL): ");
  546. LLVM_DEBUG(BI->print(dbgs()));
  547. LLVM_DEBUG(dbgs() << "\n");
  548. }
  549. } else {
  550. LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
  551. LLVM_DEBUG(BI->print(dbgs()));
  552. LLVM_DEBUG(dbgs() << "\n");
  553. }
  554. } else if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
  555. LLVM_DEBUG(dbgs() << " Will Include Terminator: ");
  556. LLVM_DEBUG(BI->print(dbgs()));
  557. LLVM_DEBUG(dbgs() << "\n");
  558. PDIRelated.insert(&*BI);
  559. } else {
  560. LLVM_DEBUG(dbgs() << " Defer: ");
  561. LLVM_DEBUG(BI->print(dbgs()));
  562. LLVM_DEBUG(dbgs() << "\n");
  563. }
  564. }
  565. LLVM_DEBUG(
  566. dbgs()
  567. << " Report parameter debug info related/related instructions: {\n");
  568. for (Instruction &I : *GEntryBlock) {
  569. if (PDIRelated.find(&I) == PDIRelated.end()) {
  570. LLVM_DEBUG(dbgs() << " !PDIRelated: ");
  571. LLVM_DEBUG(I.print(dbgs()));
  572. LLVM_DEBUG(dbgs() << "\n");
  573. PDIUnrelatedWL.push_back(&I);
  574. } else {
  575. LLVM_DEBUG(dbgs() << " PDIRelated: ");
  576. LLVM_DEBUG(I.print(dbgs()));
  577. LLVM_DEBUG(dbgs() << "\n");
  578. }
  579. }
  580. LLVM_DEBUG(dbgs() << " }\n");
  581. }
  582. /// Whether this function may be replaced by a forwarding thunk.
  583. static bool canCreateThunkFor(Function *F) {
  584. if (F->isVarArg())
  585. return false;
  586. // Don't merge tiny functions using a thunk, since it can just end up
  587. // making the function larger.
  588. if (F->size() == 1) {
  589. if (F->front().size() <= 2) {
  590. LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
  591. << " is too small to bother creating a thunk for\n");
  592. return false;
  593. }
  594. }
  595. return true;
  596. }
  597. // Replace G with a simple tail call to bitcast(F). Also (unless
  598. // MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
  599. // delete G. Under MergeFunctionsPDI, we use G itself for creating
  600. // the thunk as we preserve the debug info (and associated instructions)
  601. // from G's entry block pertaining to G's incoming arguments which are
  602. // passed on as corresponding arguments in the call that G makes to F.
  603. // For better debugability, under MergeFunctionsPDI, we do not modify G's
  604. // call sites to point to F even when within the same translation unit.
  605. void MergeFunctions::writeThunk(Function *F, Function *G) {
  606. BasicBlock *GEntryBlock = nullptr;
  607. std::vector<Instruction *> PDIUnrelatedWL;
  608. BasicBlock *BB = nullptr;
  609. Function *NewG = nullptr;
  610. if (MergeFunctionsPDI) {
  611. LLVM_DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new "
  612. "function as thunk; retain original: "
  613. << G->getName() << "()\n");
  614. GEntryBlock = &G->getEntryBlock();
  615. LLVM_DEBUG(
  616. dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related "
  617. "debug info for "
  618. << G->getName() << "() {\n");
  619. filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL);
  620. GEntryBlock->getTerminator()->eraseFromParent();
  621. BB = GEntryBlock;
  622. } else {
  623. NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
  624. G->getAddressSpace(), "", G->getParent());
  625. NewG->setComdat(G->getComdat());
  626. BB = BasicBlock::Create(F->getContext(), "", NewG);
  627. }
  628. IRBuilder<> Builder(BB);
  629. Function *H = MergeFunctionsPDI ? G : NewG;
  630. SmallVector<Value *, 16> Args;
  631. unsigned i = 0;
  632. FunctionType *FFTy = F->getFunctionType();
  633. for (Argument &AI : H->args()) {
  634. Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
  635. ++i;
  636. }
  637. CallInst *CI = Builder.CreateCall(F, Args);
  638. ReturnInst *RI = nullptr;
  639. bool isSwiftTailCall = F->getCallingConv() == CallingConv::SwiftTail &&
  640. G->getCallingConv() == CallingConv::SwiftTail;
  641. CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
  642. : llvm::CallInst::TCK_Tail);
  643. CI->setCallingConv(F->getCallingConv());
  644. CI->setAttributes(F->getAttributes());
  645. if (H->getReturnType()->isVoidTy()) {
  646. RI = Builder.CreateRetVoid();
  647. } else {
  648. RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType()));
  649. }
  650. if (MergeFunctionsPDI) {
  651. DISubprogram *DIS = G->getSubprogram();
  652. if (DIS) {
  653. DebugLoc CIDbgLoc =
  654. DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
  655. DebugLoc RIDbgLoc =
  656. DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
  657. CI->setDebugLoc(CIDbgLoc);
  658. RI->setDebugLoc(RIDbgLoc);
  659. } else {
  660. LLVM_DEBUG(
  661. dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for "
  662. << G->getName() << "()\n");
  663. }
  664. eraseTail(G);
  665. eraseInstsUnrelatedToPDI(PDIUnrelatedWL);
  666. LLVM_DEBUG(
  667. dbgs() << "} // End of parameter related debug info filtering for: "
  668. << G->getName() << "()\n");
  669. } else {
  670. NewG->copyAttributesFrom(G);
  671. NewG->takeName(G);
  672. removeUsers(G);
  673. G->replaceAllUsesWith(NewG);
  674. G->eraseFromParent();
  675. }
  676. LLVM_DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n');
  677. ++NumThunksWritten;
  678. }
  679. // Whether this function may be replaced by an alias
  680. static bool canCreateAliasFor(Function *F) {
  681. if (!MergeFunctionsAliases || !F->hasGlobalUnnamedAddr())
  682. return false;
  683. // We should only see linkages supported by aliases here
  684. assert(F->hasLocalLinkage() || F->hasExternalLinkage()
  685. || F->hasWeakLinkage() || F->hasLinkOnceLinkage());
  686. return true;
  687. }
  688. // Replace G with an alias to F (deleting function G)
  689. void MergeFunctions::writeAlias(Function *F, Function *G) {
  690. Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
  691. PointerType *PtrType = G->getType();
  692. auto *GA = GlobalAlias::create(G->getValueType(), PtrType->getAddressSpace(),
  693. G->getLinkage(), "", BitcastF, G->getParent());
  694. const MaybeAlign FAlign = F->getAlign();
  695. const MaybeAlign GAlign = G->getAlign();
  696. if (FAlign || GAlign)
  697. F->setAlignment(std::max(FAlign.valueOrOne(), GAlign.valueOrOne()));
  698. else
  699. F->setAlignment(std::nullopt);
  700. GA->takeName(G);
  701. GA->setVisibility(G->getVisibility());
  702. GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  703. removeUsers(G);
  704. G->replaceAllUsesWith(GA);
  705. G->eraseFromParent();
  706. LLVM_DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
  707. ++NumAliasesWritten;
  708. }
  709. // Replace G with an alias to F if possible, or a thunk to F if
  710. // profitable. Returns false if neither is the case.
  711. bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
  712. if (canCreateAliasFor(G)) {
  713. writeAlias(F, G);
  714. return true;
  715. }
  716. if (canCreateThunkFor(F)) {
  717. writeThunk(F, G);
  718. return true;
  719. }
  720. return false;
  721. }
  722. // Merge two equivalent functions. Upon completion, Function G is deleted.
  723. void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
  724. if (F->isInterposable()) {
  725. assert(G->isInterposable());
  726. // Both writeThunkOrAlias() calls below must succeed, either because we can
  727. // create aliases for G and NewF, or because a thunk for F is profitable.
  728. // F here has the same signature as NewF below, so that's what we check.
  729. if (!canCreateThunkFor(F) &&
  730. (!canCreateAliasFor(F) || !canCreateAliasFor(G)))
  731. return;
  732. // Make them both thunks to the same internal function.
  733. Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
  734. F->getAddressSpace(), "", F->getParent());
  735. NewF->copyAttributesFrom(F);
  736. NewF->takeName(F);
  737. removeUsers(F);
  738. F->replaceAllUsesWith(NewF);
  739. // We collect alignment before writeThunkOrAlias that overwrites NewF and
  740. // G's content.
  741. const MaybeAlign NewFAlign = NewF->getAlign();
  742. const MaybeAlign GAlign = G->getAlign();
  743. writeThunkOrAlias(F, G);
  744. writeThunkOrAlias(F, NewF);
  745. if (NewFAlign || GAlign)
  746. F->setAlignment(std::max(NewFAlign.valueOrOne(), GAlign.valueOrOne()));
  747. else
  748. F->setAlignment(std::nullopt);
  749. F->setLinkage(GlobalValue::PrivateLinkage);
  750. ++NumDoubleWeak;
  751. ++NumFunctionsMerged;
  752. } else {
  753. // For better debugability, under MergeFunctionsPDI, we do not modify G's
  754. // call sites to point to F even when within the same translation unit.
  755. if (!G->isInterposable() && !MergeFunctionsPDI) {
  756. // Functions referred to by llvm.used/llvm.compiler.used are special:
  757. // there are uses of the symbol name that are not visible to LLVM,
  758. // usually from inline asm.
  759. if (G->hasGlobalUnnamedAddr() && !Used.contains(G)) {
  760. // G might have been a key in our GlobalNumberState, and it's illegal
  761. // to replace a key in ValueMap<GlobalValue *> with a non-global.
  762. GlobalNumbers.erase(G);
  763. // If G's address is not significant, replace it entirely.
  764. Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
  765. removeUsers(G);
  766. G->replaceAllUsesWith(BitcastF);
  767. } else {
  768. // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
  769. // above).
  770. replaceDirectCallers(G, F);
  771. }
  772. }
  773. // If G was internal then we may have replaced all uses of G with F. If so,
  774. // stop here and delete G. There's no need for a thunk. (See note on
  775. // MergeFunctionsPDI above).
  776. if (G->isDiscardableIfUnused() && G->use_empty() && !MergeFunctionsPDI) {
  777. G->eraseFromParent();
  778. ++NumFunctionsMerged;
  779. return;
  780. }
  781. if (writeThunkOrAlias(F, G)) {
  782. ++NumFunctionsMerged;
  783. }
  784. }
  785. }
  786. /// Replace function F by function G.
  787. void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
  788. Function *G) {
  789. Function *F = FN.getFunc();
  790. assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
  791. "The two functions must be equal");
  792. auto I = FNodesInTree.find(F);
  793. assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
  794. assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
  795. FnTreeType::iterator IterToFNInFnTree = I->second;
  796. assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
  797. // Remove F -> FN and insert G -> FN
  798. FNodesInTree.erase(I);
  799. FNodesInTree.insert({G, IterToFNInFnTree});
  800. // Replace F with G in FN, which is stored inside the FnTree.
  801. FN.replaceBy(G);
  802. }
  803. // Ordering for functions that are equal under FunctionComparator
  804. static bool isFuncOrderCorrect(const Function *F, const Function *G) {
  805. if (F->isInterposable() != G->isInterposable()) {
  806. // Strong before weak, because the weak function may call the strong
  807. // one, but not the other way around.
  808. return !F->isInterposable();
  809. }
  810. if (F->hasLocalLinkage() != G->hasLocalLinkage()) {
  811. // External before local, because we definitely have to keep the external
  812. // function, but may be able to drop the local one.
  813. return !F->hasLocalLinkage();
  814. }
  815. // Impose a total order (by name) on the replacement of functions. This is
  816. // important when operating on more than one module independently to prevent
  817. // cycles of thunks calling each other when the modules are linked together.
  818. return F->getName() <= G->getName();
  819. }
  820. // Insert a ComparableFunction into the FnTree, or merge it away if equal to one
  821. // that was already inserted.
  822. bool MergeFunctions::insert(Function *NewFunction) {
  823. std::pair<FnTreeType::iterator, bool> Result =
  824. FnTree.insert(FunctionNode(NewFunction));
  825. if (Result.second) {
  826. assert(FNodesInTree.count(NewFunction) == 0);
  827. FNodesInTree.insert({NewFunction, Result.first});
  828. LLVM_DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName()
  829. << '\n');
  830. return false;
  831. }
  832. const FunctionNode &OldF = *Result.first;
  833. if (!isFuncOrderCorrect(OldF.getFunc(), NewFunction)) {
  834. // Swap the two functions.
  835. Function *F = OldF.getFunc();
  836. replaceFunctionInTree(*Result.first, NewFunction);
  837. NewFunction = F;
  838. assert(OldF.getFunc() != F && "Must have swapped the functions.");
  839. }
  840. LLVM_DEBUG(dbgs() << " " << OldF.getFunc()->getName()
  841. << " == " << NewFunction->getName() << '\n');
  842. Function *DeleteF = NewFunction;
  843. mergeTwoFunctions(OldF.getFunc(), DeleteF);
  844. return true;
  845. }
  846. // Remove a function from FnTree. If it was already in FnTree, add
  847. // it to Deferred so that we'll look at it in the next round.
  848. void MergeFunctions::remove(Function *F) {
  849. auto I = FNodesInTree.find(F);
  850. if (I != FNodesInTree.end()) {
  851. LLVM_DEBUG(dbgs() << "Deferred " << F->getName() << ".\n");
  852. FnTree.erase(I->second);
  853. // I->second has been invalidated, remove it from the FNodesInTree map to
  854. // preserve the invariant.
  855. FNodesInTree.erase(I);
  856. Deferred.emplace_back(F);
  857. }
  858. }
  859. // For each instruction used by the value, remove() the function that contains
  860. // the instruction. This should happen right before a call to RAUW.
  861. void MergeFunctions::removeUsers(Value *V) {
  862. for (User *U : V->users())
  863. if (auto *I = dyn_cast<Instruction>(U))
  864. remove(I->getFunction());
  865. }