MergeFunctions.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951
  1. //===- MergeFunctions.cpp - Merge identical functions ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass looks for equivalent functions that are mergable and folds them.
  10. //
  11. // Order relation is defined on set of functions. It was made through
  12. // special function comparison procedure that returns
  13. // 0 when functions are equal,
  14. // -1 when Left function is less than right function, and
  15. // 1 for opposite case. We need total-ordering, so we need to maintain
  16. // four properties on the functions set:
  17. // a <= a (reflexivity)
  18. // if a <= b and b <= a then a = b (antisymmetry)
  19. // if a <= b and b <= c then a <= c (transitivity).
  20. // for all a and b: a <= b or b <= a (totality).
  21. //
  22. // Comparison iterates through each instruction in each basic block.
  23. // Functions are kept on binary tree. For each new function F we perform
  24. // lookup in binary tree.
  25. // In practice it works the following way:
  26. // -- We define Function* container class with custom "operator<" (FunctionPtr).
  27. // -- "FunctionPtr" instances are stored in std::set collection, so every
  28. // std::set::insert operation will give you result in log(N) time.
  29. //
  30. // As an optimization, a hash of the function structure is calculated first, and
  31. // two functions are only compared if they have the same hash. This hash is
  32. // cheap to compute, and has the property that if function F == G according to
  33. // the comparison function, then hash(F) == hash(G). This consistency property
  34. // is critical to ensuring all possible merging opportunities are exploited.
  35. // Collisions in the hash affect the speed of the pass but not the correctness
  36. // or determinism of the resulting transformation.
  37. //
  38. // When a match is found the functions are folded. If both functions are
  39. // overridable, we move the functionality into a new internal function and
  40. // leave two overridable thunks to it.
  41. //
  42. //===----------------------------------------------------------------------===//
  43. //
  44. // Future work:
  45. //
  46. // * virtual functions.
  47. //
  48. // Many functions have their address taken by the virtual function table for
  49. // the object they belong to. However, as long as it's only used for a lookup
  50. // and call, this is irrelevant, and we'd like to fold such functions.
  51. //
  52. // * be smarter about bitcasts.
  53. //
  54. // In order to fold functions, we will sometimes add either bitcast instructions
  55. // or bitcast constant expressions. Unfortunately, this can confound further
  56. // analysis since the two functions differ where one has a bitcast and the
  57. // other doesn't. We should learn to look through bitcasts.
  58. //
  59. // * Compare complex types with pointer types inside.
  60. // * Compare cross-reference cases.
  61. // * Compare complex expressions.
  62. //
  63. // All the three issues above could be described as ability to prove that
  64. // fA == fB == fC == fE == fF == fG in example below:
  65. //
  66. // void fA() {
  67. // fB();
  68. // }
  69. // void fB() {
  70. // fA();
  71. // }
  72. //
  73. // void fE() {
  74. // fF();
  75. // }
  76. // void fF() {
  77. // fG();
  78. // }
  79. // void fG() {
  80. // fE();
  81. // }
  82. //
  83. // Simplest cross-reference case (fA <--> fB) was implemented in previous
  84. // versions of MergeFunctions, though it presented only in two function pairs
  85. // in test-suite (that counts >50k functions)
  86. // Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
  87. // could cover much more cases.
  88. //
  89. //===----------------------------------------------------------------------===//
  90. #include "llvm/ADT/ArrayRef.h"
  91. #include "llvm/ADT/SmallPtrSet.h"
  92. #include "llvm/ADT/SmallVector.h"
  93. #include "llvm/ADT/Statistic.h"
  94. #include "llvm/IR/Argument.h"
  95. #include "llvm/IR/Attributes.h"
  96. #include "llvm/IR/BasicBlock.h"
  97. #include "llvm/IR/Constant.h"
  98. #include "llvm/IR/Constants.h"
  99. #include "llvm/IR/DebugInfoMetadata.h"
  100. #include "llvm/IR/DebugLoc.h"
  101. #include "llvm/IR/DerivedTypes.h"
  102. #include "llvm/IR/Function.h"
  103. #include "llvm/IR/GlobalValue.h"
  104. #include "llvm/IR/IRBuilder.h"
  105. #include "llvm/IR/InstrTypes.h"
  106. #include "llvm/IR/Instruction.h"
  107. #include "llvm/IR/Instructions.h"
  108. #include "llvm/IR/IntrinsicInst.h"
  109. #include "llvm/IR/Module.h"
  110. #include "llvm/IR/Type.h"
  111. #include "llvm/IR/Use.h"
  112. #include "llvm/IR/User.h"
  113. #include "llvm/IR/Value.h"
  114. #include "llvm/IR/ValueHandle.h"
  115. #include "llvm/IR/ValueMap.h"
  116. #include "llvm/InitializePasses.h"
  117. #include "llvm/Pass.h"
  118. #include "llvm/Support/Casting.h"
  119. #include "llvm/Support/CommandLine.h"
  120. #include "llvm/Support/Debug.h"
  121. #include "llvm/Support/raw_ostream.h"
  122. #include "llvm/Transforms/IPO.h"
  123. #include "llvm/Transforms/IPO/MergeFunctions.h"
  124. #include "llvm/Transforms/Utils/FunctionComparator.h"
  125. #include <algorithm>
  126. #include <cassert>
  127. #include <iterator>
  128. #include <set>
  129. #include <utility>
  130. #include <vector>
  131. using namespace llvm;
  132. #define DEBUG_TYPE "mergefunc"
  133. STATISTIC(NumFunctionsMerged, "Number of functions merged");
  134. STATISTIC(NumThunksWritten, "Number of thunks generated");
  135. STATISTIC(NumAliasesWritten, "Number of aliases generated");
  136. STATISTIC(NumDoubleWeak, "Number of new functions created");
  137. static cl::opt<unsigned> NumFunctionsForSanityCheck(
  138. "mergefunc-sanity",
  139. cl::desc("How many functions in module could be used for "
  140. "MergeFunctions pass sanity check. "
  141. "'0' disables this check. Works only with '-debug' key."),
  142. cl::init(0), cl::Hidden);
  143. // Under option -mergefunc-preserve-debug-info we:
  144. // - Do not create a new function for a thunk.
  145. // - Retain the debug info for a thunk's parameters (and associated
  146. // instructions for the debug info) from the entry block.
  147. // Note: -debug will display the algorithm at work.
  148. // - Create debug-info for the call (to the shared implementation) made by
  149. // a thunk and its return value.
  150. // - Erase the rest of the function, retaining the (minimally sized) entry
  151. // block to create a thunk.
  152. // - Preserve a thunk's call site to point to the thunk even when both occur
  153. // within the same translation unit, to aid debugability. Note that this
  154. // behaviour differs from the underlying -mergefunc implementation which
  155. // modifies the thunk's call site to point to the shared implementation
  156. // when both occur within the same translation unit.
  157. static cl::opt<bool>
  158. MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden,
  159. cl::init(false),
  160. cl::desc("Preserve debug info in thunk when mergefunc "
  161. "transformations are made."));
  162. static cl::opt<bool>
  163. MergeFunctionsAliases("mergefunc-use-aliases", cl::Hidden,
  164. cl::init(false),
  165. cl::desc("Allow mergefunc to create aliases"));
  166. namespace {
  167. class FunctionNode {
  168. mutable AssertingVH<Function> F;
  169. FunctionComparator::FunctionHash Hash;
  170. public:
  171. // Note the hash is recalculated potentially multiple times, but it is cheap.
  172. FunctionNode(Function *F)
  173. : F(F), Hash(FunctionComparator::functionHash(*F)) {}
  174. Function *getFunc() const { return F; }
  175. FunctionComparator::FunctionHash getHash() const { return Hash; }
  176. /// Replace the reference to the function F by the function G, assuming their
  177. /// implementations are equal.
  178. void replaceBy(Function *G) const {
  179. F = G;
  180. }
  181. };
  182. /// MergeFunctions finds functions which will generate identical machine code,
  183. /// by considering all pointer types to be equivalent. Once identified,
  184. /// MergeFunctions will fold them by replacing a call to one to a call to a
  185. /// bitcast of the other.
  186. class MergeFunctions {
  187. public:
  188. MergeFunctions() : FnTree(FunctionNodeCmp(&GlobalNumbers)) {
  189. }
  190. bool runOnModule(Module &M);
  191. private:
  192. // The function comparison operator is provided here so that FunctionNodes do
  193. // not need to become larger with another pointer.
  194. class FunctionNodeCmp {
  195. GlobalNumberState* GlobalNumbers;
  196. public:
  197. FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
  198. bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
  199. // Order first by hashes, then full function comparison.
  200. if (LHS.getHash() != RHS.getHash())
  201. return LHS.getHash() < RHS.getHash();
  202. FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
  203. return FCmp.compare() == -1;
  204. }
  205. };
  206. using FnTreeType = std::set<FunctionNode, FunctionNodeCmp>;
  207. GlobalNumberState GlobalNumbers;
  208. /// A work queue of functions that may have been modified and should be
  209. /// analyzed again.
  210. std::vector<WeakTrackingVH> Deferred;
  211. #ifndef NDEBUG
  212. /// Checks the rules of order relation introduced among functions set.
  213. /// Returns true, if sanity check has been passed, and false if failed.
  214. bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist);
  215. #endif
  216. /// Insert a ComparableFunction into the FnTree, or merge it away if it's
  217. /// equal to one that's already present.
  218. bool insert(Function *NewFunction);
  219. /// Remove a Function from the FnTree and queue it up for a second sweep of
  220. /// analysis.
  221. void remove(Function *F);
  222. /// Find the functions that use this Value and remove them from FnTree and
  223. /// queue the functions.
  224. void removeUsers(Value *V);
  225. /// Replace all direct calls of Old with calls of New. Will bitcast New if
  226. /// necessary to make types match.
  227. void replaceDirectCallers(Function *Old, Function *New);
  228. /// Merge two equivalent functions. Upon completion, G may be deleted, or may
  229. /// be converted into a thunk. In either case, it should never be visited
  230. /// again.
  231. void mergeTwoFunctions(Function *F, Function *G);
  232. /// Fill PDIUnrelatedWL with instructions from the entry block that are
  233. /// unrelated to parameter related debug info.
  234. void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
  235. std::vector<Instruction *> &PDIUnrelatedWL);
  236. /// Erase the rest of the CFG (i.e. barring the entry block).
  237. void eraseTail(Function *G);
  238. /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
  239. /// parameter debug info, from the entry block.
  240. void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL);
  241. /// Replace G with a simple tail call to bitcast(F). Also (unless
  242. /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
  243. /// delete G.
  244. void writeThunk(Function *F, Function *G);
  245. // Replace G with an alias to F (deleting function G)
  246. void writeAlias(Function *F, Function *G);
  247. // Replace G with an alias to F if possible, or a thunk to F if possible.
  248. // Returns false if neither is the case.
  249. bool writeThunkOrAlias(Function *F, Function *G);
  250. /// Replace function F with function G in the function tree.
  251. void replaceFunctionInTree(const FunctionNode &FN, Function *G);
  252. /// The set of all distinct functions. Use the insert() and remove() methods
  253. /// to modify it. The map allows efficient lookup and deferring of Functions.
  254. FnTreeType FnTree;
  255. // Map functions to the iterators of the FunctionNode which contains them
  256. // in the FnTree. This must be updated carefully whenever the FnTree is
  257. // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
  258. // dangling iterators into FnTree. The invariant that preserves this is that
  259. // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
  260. DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
  261. };
  262. class MergeFunctionsLegacyPass : public ModulePass {
  263. public:
  264. static char ID;
  265. MergeFunctionsLegacyPass(): ModulePass(ID) {
  266. initializeMergeFunctionsLegacyPassPass(*PassRegistry::getPassRegistry());
  267. }
  268. bool runOnModule(Module &M) override {
  269. if (skipModule(M))
  270. return false;
  271. MergeFunctions MF;
  272. return MF.runOnModule(M);
  273. }
  274. };
  275. } // end anonymous namespace
  276. char MergeFunctionsLegacyPass::ID = 0;
  277. INITIALIZE_PASS(MergeFunctionsLegacyPass, "mergefunc",
  278. "Merge Functions", false, false)
  279. ModulePass *llvm::createMergeFunctionsPass() {
  280. return new MergeFunctionsLegacyPass();
  281. }
  282. PreservedAnalyses MergeFunctionsPass::run(Module &M,
  283. ModuleAnalysisManager &AM) {
  284. MergeFunctions MF;
  285. if (!MF.runOnModule(M))
  286. return PreservedAnalyses::all();
  287. return PreservedAnalyses::none();
  288. }
  289. #ifndef NDEBUG
  290. bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {
  291. if (const unsigned Max = NumFunctionsForSanityCheck) {
  292. unsigned TripleNumber = 0;
  293. bool Valid = true;
  294. dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
  295. unsigned i = 0;
  296. for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(),
  297. E = Worklist.end();
  298. I != E && i < Max; ++I, ++i) {
  299. unsigned j = i;
  300. for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max;
  301. ++J, ++j) {
  302. Function *F1 = cast<Function>(*I);
  303. Function *F2 = cast<Function>(*J);
  304. int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
  305. int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
  306. // If F1 <= F2, then F2 >= F1, otherwise report failure.
  307. if (Res1 != -Res2) {
  308. dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
  309. << "\n";
  310. dbgs() << *F1 << '\n' << *F2 << '\n';
  311. Valid = false;
  312. }
  313. if (Res1 == 0)
  314. continue;
  315. unsigned k = j;
  316. for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;
  317. ++k, ++K, ++TripleNumber) {
  318. if (K == J)
  319. continue;
  320. Function *F3 = cast<Function>(*K);
  321. int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
  322. int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
  323. bool Transitive = true;
  324. if (Res1 != 0 && Res1 == Res4) {
  325. // F1 > F2, F2 > F3 => F1 > F3
  326. Transitive = Res3 == Res1;
  327. } else if (Res3 != 0 && Res3 == -Res4) {
  328. // F1 > F3, F3 > F2 => F1 > F2
  329. Transitive = Res3 == Res1;
  330. } else if (Res4 != 0 && -Res3 == Res4) {
  331. // F2 > F3, F3 > F1 => F2 > F1
  332. Transitive = Res4 == -Res1;
  333. }
  334. if (!Transitive) {
  335. dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
  336. << TripleNumber << "\n";
  337. dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
  338. << Res4 << "\n";
  339. dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n';
  340. Valid = false;
  341. }
  342. }
  343. }
  344. }
  345. dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
  346. return Valid;
  347. }
  348. return true;
  349. }
  350. #endif
  351. /// Check whether \p F is eligible for function merging.
  352. static bool isEligibleForMerging(Function &F) {
  353. return !F.isDeclaration() && !F.hasAvailableExternallyLinkage();
  354. }
  355. bool MergeFunctions::runOnModule(Module &M) {
  356. bool Changed = false;
  357. // All functions in the module, ordered by hash. Functions with a unique
  358. // hash value are easily eliminated.
  359. std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
  360. HashedFuncs;
  361. for (Function &Func : M) {
  362. if (isEligibleForMerging(Func)) {
  363. HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
  364. }
  365. }
  366. llvm::stable_sort(HashedFuncs, less_first());
  367. auto S = HashedFuncs.begin();
  368. for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
  369. // If the hash value matches the previous value or the next one, we must
  370. // consider merging it. Otherwise it is dropped and never considered again.
  371. if ((I != S && std::prev(I)->first == I->first) ||
  372. (std::next(I) != IE && std::next(I)->first == I->first) ) {
  373. Deferred.push_back(WeakTrackingVH(I->second));
  374. }
  375. }
  376. do {
  377. std::vector<WeakTrackingVH> Worklist;
  378. Deferred.swap(Worklist);
  379. LLVM_DEBUG(doSanityCheck(Worklist));
  380. LLVM_DEBUG(dbgs() << "size of module: " << M.size() << '\n');
  381. LLVM_DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
  382. // Insert functions and merge them.
  383. for (WeakTrackingVH &I : Worklist) {
  384. if (!I)
  385. continue;
  386. Function *F = cast<Function>(I);
  387. if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) {
  388. Changed |= insert(F);
  389. }
  390. }
  391. LLVM_DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
  392. } while (!Deferred.empty());
  393. FnTree.clear();
  394. FNodesInTree.clear();
  395. GlobalNumbers.clear();
  396. return Changed;
  397. }
  398. // Replace direct callers of Old with New.
  399. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
  400. Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
  401. for (Use &U : llvm::make_early_inc_range(Old->uses())) {
  402. CallBase *CB = dyn_cast<CallBase>(U.getUser());
  403. if (CB && CB->isCallee(&U)) {
  404. // Do not copy attributes from the called function to the call-site.
  405. // Function comparison ensures that the attributes are the same up to
  406. // type congruences in byval(), in which case we need to keep the byval
  407. // type of the call-site, not the callee function.
  408. remove(CB->getFunction());
  409. U.set(BitcastNew);
  410. }
  411. }
  412. }
  413. // Helper for writeThunk,
  414. // Selects proper bitcast operation,
  415. // but a bit simpler then CastInst::getCastOpcode.
  416. static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
  417. Type *SrcTy = V->getType();
  418. if (SrcTy->isStructTy()) {
  419. assert(DestTy->isStructTy());
  420. assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
  421. Value *Result = UndefValue::get(DestTy);
  422. for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
  423. Value *Element = createCast(
  424. Builder, Builder.CreateExtractValue(V, makeArrayRef(I)),
  425. DestTy->getStructElementType(I));
  426. Result =
  427. Builder.CreateInsertValue(Result, Element, makeArrayRef(I));
  428. }
  429. return Result;
  430. }
  431. assert(!DestTy->isStructTy());
  432. if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
  433. return Builder.CreateIntToPtr(V, DestTy);
  434. else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
  435. return Builder.CreatePtrToInt(V, DestTy);
  436. else
  437. return Builder.CreateBitCast(V, DestTy);
  438. }
  439. // Erase the instructions in PDIUnrelatedWL as they are unrelated to the
  440. // parameter debug info, from the entry block.
  441. void MergeFunctions::eraseInstsUnrelatedToPDI(
  442. std::vector<Instruction *> &PDIUnrelatedWL) {
  443. LLVM_DEBUG(
  444. dbgs() << " Erasing instructions (in reverse order of appearance in "
  445. "entry block) unrelated to parameter debug info from entry "
  446. "block: {\n");
  447. while (!PDIUnrelatedWL.empty()) {
  448. Instruction *I = PDIUnrelatedWL.back();
  449. LLVM_DEBUG(dbgs() << " Deleting Instruction: ");
  450. LLVM_DEBUG(I->print(dbgs()));
  451. LLVM_DEBUG(dbgs() << "\n");
  452. I->eraseFromParent();
  453. PDIUnrelatedWL.pop_back();
  454. }
  455. LLVM_DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter "
  456. "debug info from entry block. \n");
  457. }
  458. // Reduce G to its entry block.
  459. void MergeFunctions::eraseTail(Function *G) {
  460. std::vector<BasicBlock *> WorklistBB;
  461. for (BasicBlock &BB : drop_begin(*G)) {
  462. BB.dropAllReferences();
  463. WorklistBB.push_back(&BB);
  464. }
  465. while (!WorklistBB.empty()) {
  466. BasicBlock *BB = WorklistBB.back();
  467. BB->eraseFromParent();
  468. WorklistBB.pop_back();
  469. }
  470. }
  471. // We are interested in the following instructions from the entry block as being
  472. // related to parameter debug info:
  473. // - @llvm.dbg.declare
  474. // - stores from the incoming parameters to locations on the stack-frame
  475. // - allocas that create these locations on the stack-frame
  476. // - @llvm.dbg.value
  477. // - the entry block's terminator
  478. // The rest are unrelated to debug info for the parameters; fill up
  479. // PDIUnrelatedWL with such instructions.
  480. void MergeFunctions::filterInstsUnrelatedToPDI(
  481. BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
  482. std::set<Instruction *> PDIRelated;
  483. for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
  484. BI != BIE; ++BI) {
  485. if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
  486. LLVM_DEBUG(dbgs() << " Deciding: ");
  487. LLVM_DEBUG(BI->print(dbgs()));
  488. LLVM_DEBUG(dbgs() << "\n");
  489. DILocalVariable *DILocVar = DVI->getVariable();
  490. if (DILocVar->isParameter()) {
  491. LLVM_DEBUG(dbgs() << " Include (parameter): ");
  492. LLVM_DEBUG(BI->print(dbgs()));
  493. LLVM_DEBUG(dbgs() << "\n");
  494. PDIRelated.insert(&*BI);
  495. } else {
  496. LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
  497. LLVM_DEBUG(BI->print(dbgs()));
  498. LLVM_DEBUG(dbgs() << "\n");
  499. }
  500. } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
  501. LLVM_DEBUG(dbgs() << " Deciding: ");
  502. LLVM_DEBUG(BI->print(dbgs()));
  503. LLVM_DEBUG(dbgs() << "\n");
  504. DILocalVariable *DILocVar = DDI->getVariable();
  505. if (DILocVar->isParameter()) {
  506. LLVM_DEBUG(dbgs() << " Parameter: ");
  507. LLVM_DEBUG(DILocVar->print(dbgs()));
  508. AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
  509. if (AI) {
  510. LLVM_DEBUG(dbgs() << " Processing alloca users: ");
  511. LLVM_DEBUG(dbgs() << "\n");
  512. for (User *U : AI->users()) {
  513. if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
  514. if (Value *Arg = SI->getValueOperand()) {
  515. if (isa<Argument>(Arg)) {
  516. LLVM_DEBUG(dbgs() << " Include: ");
  517. LLVM_DEBUG(AI->print(dbgs()));
  518. LLVM_DEBUG(dbgs() << "\n");
  519. PDIRelated.insert(AI);
  520. LLVM_DEBUG(dbgs() << " Include (parameter): ");
  521. LLVM_DEBUG(SI->print(dbgs()));
  522. LLVM_DEBUG(dbgs() << "\n");
  523. PDIRelated.insert(SI);
  524. LLVM_DEBUG(dbgs() << " Include: ");
  525. LLVM_DEBUG(BI->print(dbgs()));
  526. LLVM_DEBUG(dbgs() << "\n");
  527. PDIRelated.insert(&*BI);
  528. } else {
  529. LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
  530. LLVM_DEBUG(SI->print(dbgs()));
  531. LLVM_DEBUG(dbgs() << "\n");
  532. }
  533. }
  534. } else {
  535. LLVM_DEBUG(dbgs() << " Defer: ");
  536. LLVM_DEBUG(U->print(dbgs()));
  537. LLVM_DEBUG(dbgs() << "\n");
  538. }
  539. }
  540. } else {
  541. LLVM_DEBUG(dbgs() << " Delete (alloca NULL): ");
  542. LLVM_DEBUG(BI->print(dbgs()));
  543. LLVM_DEBUG(dbgs() << "\n");
  544. }
  545. } else {
  546. LLVM_DEBUG(dbgs() << " Delete (!parameter): ");
  547. LLVM_DEBUG(BI->print(dbgs()));
  548. LLVM_DEBUG(dbgs() << "\n");
  549. }
  550. } else if (BI->isTerminator() && &*BI == GEntryBlock->getTerminator()) {
  551. LLVM_DEBUG(dbgs() << " Will Include Terminator: ");
  552. LLVM_DEBUG(BI->print(dbgs()));
  553. LLVM_DEBUG(dbgs() << "\n");
  554. PDIRelated.insert(&*BI);
  555. } else {
  556. LLVM_DEBUG(dbgs() << " Defer: ");
  557. LLVM_DEBUG(BI->print(dbgs()));
  558. LLVM_DEBUG(dbgs() << "\n");
  559. }
  560. }
  561. LLVM_DEBUG(
  562. dbgs()
  563. << " Report parameter debug info related/related instructions: {\n");
  564. for (Instruction &I : *GEntryBlock) {
  565. if (PDIRelated.find(&I) == PDIRelated.end()) {
  566. LLVM_DEBUG(dbgs() << " !PDIRelated: ");
  567. LLVM_DEBUG(I.print(dbgs()));
  568. LLVM_DEBUG(dbgs() << "\n");
  569. PDIUnrelatedWL.push_back(&I);
  570. } else {
  571. LLVM_DEBUG(dbgs() << " PDIRelated: ");
  572. LLVM_DEBUG(I.print(dbgs()));
  573. LLVM_DEBUG(dbgs() << "\n");
  574. }
  575. }
  576. LLVM_DEBUG(dbgs() << " }\n");
  577. }
  578. /// Whether this function may be replaced by a forwarding thunk.
  579. static bool canCreateThunkFor(Function *F) {
  580. if (F->isVarArg())
  581. return false;
  582. // Don't merge tiny functions using a thunk, since it can just end up
  583. // making the function larger.
  584. if (F->size() == 1) {
  585. if (F->front().size() <= 2) {
  586. LLVM_DEBUG(dbgs() << "canCreateThunkFor: " << F->getName()
  587. << " is too small to bother creating a thunk for\n");
  588. return false;
  589. }
  590. }
  591. return true;
  592. }
  593. // Replace G with a simple tail call to bitcast(F). Also (unless
  594. // MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
  595. // delete G. Under MergeFunctionsPDI, we use G itself for creating
  596. // the thunk as we preserve the debug info (and associated instructions)
  597. // from G's entry block pertaining to G's incoming arguments which are
  598. // passed on as corresponding arguments in the call that G makes to F.
  599. // For better debugability, under MergeFunctionsPDI, we do not modify G's
  600. // call sites to point to F even when within the same translation unit.
  601. void MergeFunctions::writeThunk(Function *F, Function *G) {
  602. BasicBlock *GEntryBlock = nullptr;
  603. std::vector<Instruction *> PDIUnrelatedWL;
  604. BasicBlock *BB = nullptr;
  605. Function *NewG = nullptr;
  606. if (MergeFunctionsPDI) {
  607. LLVM_DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new "
  608. "function as thunk; retain original: "
  609. << G->getName() << "()\n");
  610. GEntryBlock = &G->getEntryBlock();
  611. LLVM_DEBUG(
  612. dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related "
  613. "debug info for "
  614. << G->getName() << "() {\n");
  615. filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL);
  616. GEntryBlock->getTerminator()->eraseFromParent();
  617. BB = GEntryBlock;
  618. } else {
  619. NewG = Function::Create(G->getFunctionType(), G->getLinkage(),
  620. G->getAddressSpace(), "", G->getParent());
  621. NewG->setComdat(G->getComdat());
  622. BB = BasicBlock::Create(F->getContext(), "", NewG);
  623. }
  624. IRBuilder<> Builder(BB);
  625. Function *H = MergeFunctionsPDI ? G : NewG;
  626. SmallVector<Value *, 16> Args;
  627. unsigned i = 0;
  628. FunctionType *FFTy = F->getFunctionType();
  629. for (Argument &AI : H->args()) {
  630. Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
  631. ++i;
  632. }
  633. CallInst *CI = Builder.CreateCall(F, Args);
  634. ReturnInst *RI = nullptr;
  635. bool isSwiftTailCall = F->getCallingConv() == CallingConv::SwiftTail &&
  636. G->getCallingConv() == CallingConv::SwiftTail;
  637. CI->setTailCallKind(isSwiftTailCall ? llvm::CallInst::TCK_MustTail
  638. : llvm::CallInst::TCK_Tail);
  639. CI->setCallingConv(F->getCallingConv());
  640. CI->setAttributes(F->getAttributes());
  641. if (H->getReturnType()->isVoidTy()) {
  642. RI = Builder.CreateRetVoid();
  643. } else {
  644. RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType()));
  645. }
  646. if (MergeFunctionsPDI) {
  647. DISubprogram *DIS = G->getSubprogram();
  648. if (DIS) {
  649. DebugLoc CIDbgLoc =
  650. DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
  651. DebugLoc RIDbgLoc =
  652. DILocation::get(DIS->getContext(), DIS->getScopeLine(), 0, DIS);
  653. CI->setDebugLoc(CIDbgLoc);
  654. RI->setDebugLoc(RIDbgLoc);
  655. } else {
  656. LLVM_DEBUG(
  657. dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for "
  658. << G->getName() << "()\n");
  659. }
  660. eraseTail(G);
  661. eraseInstsUnrelatedToPDI(PDIUnrelatedWL);
  662. LLVM_DEBUG(
  663. dbgs() << "} // End of parameter related debug info filtering for: "
  664. << G->getName() << "()\n");
  665. } else {
  666. NewG->copyAttributesFrom(G);
  667. NewG->takeName(G);
  668. removeUsers(G);
  669. G->replaceAllUsesWith(NewG);
  670. G->eraseFromParent();
  671. }
  672. LLVM_DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n');
  673. ++NumThunksWritten;
  674. }
  675. // Whether this function may be replaced by an alias
  676. static bool canCreateAliasFor(Function *F) {
  677. if (!MergeFunctionsAliases || !F->hasGlobalUnnamedAddr())
  678. return false;
  679. // We should only see linkages supported by aliases here
  680. assert(F->hasLocalLinkage() || F->hasExternalLinkage()
  681. || F->hasWeakLinkage() || F->hasLinkOnceLinkage());
  682. return true;
  683. }
  684. // Replace G with an alias to F (deleting function G)
  685. void MergeFunctions::writeAlias(Function *F, Function *G) {
  686. Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
  687. PointerType *PtrType = G->getType();
  688. auto *GA = GlobalAlias::create(G->getValueType(), PtrType->getAddressSpace(),
  689. G->getLinkage(), "", BitcastF, G->getParent());
  690. F->setAlignment(MaybeAlign(std::max(F->getAlignment(), G->getAlignment())));
  691. GA->takeName(G);
  692. GA->setVisibility(G->getVisibility());
  693. GA->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  694. removeUsers(G);
  695. G->replaceAllUsesWith(GA);
  696. G->eraseFromParent();
  697. LLVM_DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
  698. ++NumAliasesWritten;
  699. }
  700. // Replace G with an alias to F if possible, or a thunk to F if
  701. // profitable. Returns false if neither is the case.
  702. bool MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
  703. if (canCreateAliasFor(G)) {
  704. writeAlias(F, G);
  705. return true;
  706. }
  707. if (canCreateThunkFor(F)) {
  708. writeThunk(F, G);
  709. return true;
  710. }
  711. return false;
  712. }
  713. // Merge two equivalent functions. Upon completion, Function G is deleted.
  714. void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
  715. if (F->isInterposable()) {
  716. assert(G->isInterposable());
  717. // Both writeThunkOrAlias() calls below must succeed, either because we can
  718. // create aliases for G and NewF, or because a thunk for F is profitable.
  719. // F here has the same signature as NewF below, so that's what we check.
  720. if (!canCreateThunkFor(F) &&
  721. (!canCreateAliasFor(F) || !canCreateAliasFor(G)))
  722. return;
  723. // Make them both thunks to the same internal function.
  724. Function *NewF = Function::Create(F->getFunctionType(), F->getLinkage(),
  725. F->getAddressSpace(), "", F->getParent());
  726. NewF->copyAttributesFrom(F);
  727. NewF->takeName(F);
  728. removeUsers(F);
  729. F->replaceAllUsesWith(NewF);
  730. MaybeAlign MaxAlignment(std::max(G->getAlignment(), NewF->getAlignment()));
  731. writeThunkOrAlias(F, G);
  732. writeThunkOrAlias(F, NewF);
  733. F->setAlignment(MaxAlignment);
  734. F->setLinkage(GlobalValue::PrivateLinkage);
  735. ++NumDoubleWeak;
  736. ++NumFunctionsMerged;
  737. } else {
  738. // For better debugability, under MergeFunctionsPDI, we do not modify G's
  739. // call sites to point to F even when within the same translation unit.
  740. if (!G->isInterposable() && !MergeFunctionsPDI) {
  741. if (G->hasGlobalUnnamedAddr()) {
  742. // G might have been a key in our GlobalNumberState, and it's illegal
  743. // to replace a key in ValueMap<GlobalValue *> with a non-global.
  744. GlobalNumbers.erase(G);
  745. // If G's address is not significant, replace it entirely.
  746. Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
  747. removeUsers(G);
  748. G->replaceAllUsesWith(BitcastF);
  749. } else {
  750. // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
  751. // above).
  752. replaceDirectCallers(G, F);
  753. }
  754. }
  755. // If G was internal then we may have replaced all uses of G with F. If so,
  756. // stop here and delete G. There's no need for a thunk. (See note on
  757. // MergeFunctionsPDI above).
  758. if (G->isDiscardableIfUnused() && G->use_empty() && !MergeFunctionsPDI) {
  759. G->eraseFromParent();
  760. ++NumFunctionsMerged;
  761. return;
  762. }
  763. if (writeThunkOrAlias(F, G)) {
  764. ++NumFunctionsMerged;
  765. }
  766. }
  767. }
  768. /// Replace function F by function G.
  769. void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
  770. Function *G) {
  771. Function *F = FN.getFunc();
  772. assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
  773. "The two functions must be equal");
  774. auto I = FNodesInTree.find(F);
  775. assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
  776. assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
  777. FnTreeType::iterator IterToFNInFnTree = I->second;
  778. assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
  779. // Remove F -> FN and insert G -> FN
  780. FNodesInTree.erase(I);
  781. FNodesInTree.insert({G, IterToFNInFnTree});
  782. // Replace F with G in FN, which is stored inside the FnTree.
  783. FN.replaceBy(G);
  784. }
  785. // Ordering for functions that are equal under FunctionComparator
  786. static bool isFuncOrderCorrect(const Function *F, const Function *G) {
  787. if (F->isInterposable() != G->isInterposable()) {
  788. // Strong before weak, because the weak function may call the strong
  789. // one, but not the other way around.
  790. return !F->isInterposable();
  791. }
  792. if (F->hasLocalLinkage() != G->hasLocalLinkage()) {
  793. // External before local, because we definitely have to keep the external
  794. // function, but may be able to drop the local one.
  795. return !F->hasLocalLinkage();
  796. }
  797. // Impose a total order (by name) on the replacement of functions. This is
  798. // important when operating on more than one module independently to prevent
  799. // cycles of thunks calling each other when the modules are linked together.
  800. return F->getName() <= G->getName();
  801. }
  802. // Insert a ComparableFunction into the FnTree, or merge it away if equal to one
  803. // that was already inserted.
  804. bool MergeFunctions::insert(Function *NewFunction) {
  805. std::pair<FnTreeType::iterator, bool> Result =
  806. FnTree.insert(FunctionNode(NewFunction));
  807. if (Result.second) {
  808. assert(FNodesInTree.count(NewFunction) == 0);
  809. FNodesInTree.insert({NewFunction, Result.first});
  810. LLVM_DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName()
  811. << '\n');
  812. return false;
  813. }
  814. const FunctionNode &OldF = *Result.first;
  815. if (!isFuncOrderCorrect(OldF.getFunc(), NewFunction)) {
  816. // Swap the two functions.
  817. Function *F = OldF.getFunc();
  818. replaceFunctionInTree(*Result.first, NewFunction);
  819. NewFunction = F;
  820. assert(OldF.getFunc() != F && "Must have swapped the functions.");
  821. }
  822. LLVM_DEBUG(dbgs() << " " << OldF.getFunc()->getName()
  823. << " == " << NewFunction->getName() << '\n');
  824. Function *DeleteF = NewFunction;
  825. mergeTwoFunctions(OldF.getFunc(), DeleteF);
  826. return true;
  827. }
  828. // Remove a function from FnTree. If it was already in FnTree, add
  829. // it to Deferred so that we'll look at it in the next round.
  830. void MergeFunctions::remove(Function *F) {
  831. auto I = FNodesInTree.find(F);
  832. if (I != FNodesInTree.end()) {
  833. LLVM_DEBUG(dbgs() << "Deferred " << F->getName() << ".\n");
  834. FnTree.erase(I->second);
  835. // I->second has been invalidated, remove it from the FNodesInTree map to
  836. // preserve the invariant.
  837. FNodesInTree.erase(I);
  838. Deferred.emplace_back(F);
  839. }
  840. }
  841. // For each instruction used by the value, remove() the function that contains
  842. // the instruction. This should happen right before a call to RAUW.
  843. void MergeFunctions::removeUsers(Value *V) {
  844. for (User *U : V->users())
  845. if (auto *I = dyn_cast<Instruction>(U))
  846. remove(I->getFunction());
  847. }