Internalize.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. //===-- Internalize.cpp - Mark functions internal -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass loops over all of the functions and variables in the input module.
  10. // If the function or variable does not need to be preserved according to the
  11. // client supplied callback, it is marked as internal.
  12. //
  13. // This transformation would not be legal in a regular compilation, but it gets
  14. // extra information from the linker about what is safe.
  15. //
  16. // For example: Internalizing a function with external linkage. Only if we are
  17. // told it is only used from within this module, it is safe to do it.
  18. //
  19. //===----------------------------------------------------------------------===//
  20. #include "llvm/Transforms/IPO/Internalize.h"
  21. #include "llvm/ADT/SmallPtrSet.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/ADT/StringSet.h"
  24. #include "llvm/ADT/Triple.h"
  25. #include "llvm/Analysis/CallGraph.h"
  26. #include "llvm/IR/Module.h"
  27. #include "llvm/InitializePasses.h"
  28. #include "llvm/Pass.h"
  29. #include "llvm/Support/CommandLine.h"
  30. #include "llvm/Support/Debug.h"
  31. #include "llvm/Support/LineIterator.h"
  32. #include "llvm/Support/MemoryBuffer.h"
  33. #include "llvm/Support/raw_ostream.h"
  34. #include "llvm/Transforms/IPO.h"
  35. #include "llvm/Transforms/Utils/GlobalStatus.h"
  36. #include "llvm/Transforms/Utils/ModuleUtils.h"
  37. using namespace llvm;
  38. #define DEBUG_TYPE "internalize"
  39. STATISTIC(NumAliases, "Number of aliases internalized");
  40. STATISTIC(NumFunctions, "Number of functions internalized");
  41. STATISTIC(NumGlobals, "Number of global vars internalized");
  42. // APIFile - A file which contains a list of symbols that should not be marked
  43. // external.
  44. static cl::opt<std::string>
  45. APIFile("internalize-public-api-file", cl::value_desc("filename"),
  46. cl::desc("A file containing list of symbol names to preserve"));
  47. // APIList - A list of symbols that should not be marked internal.
  48. static cl::list<std::string>
  49. APIList("internalize-public-api-list", cl::value_desc("list"),
  50. cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
  51. namespace {
  52. // Helper to load an API list to preserve from file and expose it as a functor
  53. // for internalization.
  54. class PreserveAPIList {
  55. public:
  56. PreserveAPIList() {
  57. if (!APIFile.empty())
  58. LoadFile(APIFile);
  59. ExternalNames.insert(APIList.begin(), APIList.end());
  60. }
  61. bool operator()(const GlobalValue &GV) {
  62. return ExternalNames.count(GV.getName());
  63. }
  64. private:
  65. // Contains the set of symbols loaded from file
  66. StringSet<> ExternalNames;
  67. void LoadFile(StringRef Filename) {
  68. // Load the APIFile...
  69. ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
  70. MemoryBuffer::getFile(Filename);
  71. if (!Buf) {
  72. errs() << "WARNING: Internalize couldn't load file '" << Filename
  73. << "'! Continuing as if it's empty.\n";
  74. return; // Just continue as if the file were empty
  75. }
  76. for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
  77. ExternalNames.insert(*I);
  78. }
  79. };
  80. } // end anonymous namespace
  81. bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
  82. // Function must be defined here
  83. if (GV.isDeclaration())
  84. return true;
  85. // Available externally is really just a "declaration with a body".
  86. if (GV.hasAvailableExternallyLinkage())
  87. return true;
  88. // Assume that dllexported symbols are referenced elsewhere
  89. if (GV.hasDLLExportStorageClass())
  90. return true;
  91. // As the name suggests, externally initialized variables need preserving as
  92. // they would be initialized elsewhere externally.
  93. if (const auto *G = dyn_cast<GlobalVariable>(&GV))
  94. if (G->isExternallyInitialized())
  95. return true;
  96. // Already local, has nothing to do.
  97. if (GV.hasLocalLinkage())
  98. return false;
  99. // Check some special cases
  100. if (AlwaysPreserved.count(GV.getName()))
  101. return true;
  102. return MustPreserveGV(GV);
  103. }
  104. bool InternalizePass::maybeInternalize(
  105. GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
  106. SmallString<0> ComdatName;
  107. if (Comdat *C = GV.getComdat()) {
  108. // For GlobalAlias, C is the aliasee object's comdat which may have been
  109. // redirected. So ComdatMap may not contain C.
  110. if (ComdatMap.lookup(C).External)
  111. return false;
  112. if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
  113. // If a comdat with one member is not externally visible, we can drop it.
  114. // Otherwise, the comdat can be used to establish dependencies among the
  115. // group of sections. Thus we have to keep the comdat but switch it to
  116. // nodeduplicate.
  117. // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
  118. // nodeduplicate.
  119. ComdatInfo &Info = ComdatMap.find(C)->second;
  120. if (Info.Size == 1)
  121. GO->setComdat(nullptr);
  122. else if (!IsWasm)
  123. C->setSelectionKind(Comdat::NoDeduplicate);
  124. }
  125. if (GV.hasLocalLinkage())
  126. return false;
  127. } else {
  128. if (GV.hasLocalLinkage())
  129. return false;
  130. if (shouldPreserveGV(GV))
  131. return false;
  132. }
  133. GV.setVisibility(GlobalValue::DefaultVisibility);
  134. GV.setLinkage(GlobalValue::InternalLinkage);
  135. return true;
  136. }
  137. // If GV is part of a comdat and is externally visible, update the comdat size
  138. // and keep track of its comdat so that we don't internalize any of its members.
  139. void InternalizePass::checkComdat(
  140. GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
  141. Comdat *C = GV.getComdat();
  142. if (!C)
  143. return;
  144. ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
  145. ++Info.Size;
  146. if (shouldPreserveGV(GV))
  147. Info.External = true;
  148. }
  149. bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
  150. bool Changed = false;
  151. CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
  152. SmallVector<GlobalValue *, 4> Used;
  153. collectUsedGlobalVariables(M, Used, false);
  154. // Collect comdat size and visiblity information for the module.
  155. DenseMap<const Comdat *, ComdatInfo> ComdatMap;
  156. if (!M.getComdatSymbolTable().empty()) {
  157. for (Function &F : M)
  158. checkComdat(F, ComdatMap);
  159. for (GlobalVariable &GV : M.globals())
  160. checkComdat(GV, ComdatMap);
  161. for (GlobalAlias &GA : M.aliases())
  162. checkComdat(GA, ComdatMap);
  163. }
  164. // We must assume that globals in llvm.used have a reference that not even
  165. // the linker can see, so we don't internalize them.
  166. // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
  167. // linker can drop those symbols. If this pass is running as part of LTO,
  168. // one might think that it could just drop llvm.compiler.used. The problem
  169. // is that even in LTO llvm doesn't see every reference. For example,
  170. // we don't see references from function local inline assembly. To be
  171. // conservative, we internalize symbols in llvm.compiler.used, but we
  172. // keep llvm.compiler.used so that the symbol is not deleted by llvm.
  173. for (GlobalValue *V : Used) {
  174. AlwaysPreserved.insert(V->getName());
  175. }
  176. // Never internalize the llvm.used symbol. It is used to implement
  177. // attribute((used)).
  178. // FIXME: Shouldn't this just filter on llvm.metadata section??
  179. AlwaysPreserved.insert("llvm.used");
  180. AlwaysPreserved.insert("llvm.compiler.used");
  181. // Never internalize anchors used by the machine module info, else the info
  182. // won't find them. (see MachineModuleInfo.)
  183. AlwaysPreserved.insert("llvm.global_ctors");
  184. AlwaysPreserved.insert("llvm.global_dtors");
  185. AlwaysPreserved.insert("llvm.global.annotations");
  186. // Never internalize symbols code-gen inserts.
  187. // FIXME: We should probably add this (and the __stack_chk_guard) via some
  188. // type of call-back in CodeGen.
  189. AlwaysPreserved.insert("__stack_chk_fail");
  190. if (Triple(M.getTargetTriple()).isOSAIX())
  191. AlwaysPreserved.insert("__ssp_canary_word");
  192. else
  193. AlwaysPreserved.insert("__stack_chk_guard");
  194. // Mark all functions not in the api as internal.
  195. IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
  196. for (Function &I : M) {
  197. if (!maybeInternalize(I, ComdatMap))
  198. continue;
  199. Changed = true;
  200. if (ExternalNode)
  201. // Remove a callgraph edge from the external node to this function.
  202. ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
  203. ++NumFunctions;
  204. LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
  205. }
  206. // Mark all global variables with initializers that are not in the api as
  207. // internal as well.
  208. for (auto &GV : M.globals()) {
  209. if (!maybeInternalize(GV, ComdatMap))
  210. continue;
  211. Changed = true;
  212. ++NumGlobals;
  213. LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
  214. }
  215. // Mark all aliases that are not in the api as internal as well.
  216. for (auto &GA : M.aliases()) {
  217. if (!maybeInternalize(GA, ComdatMap))
  218. continue;
  219. Changed = true;
  220. ++NumAliases;
  221. LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
  222. }
  223. return Changed;
  224. }
  225. InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
  226. PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
  227. if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
  228. return PreservedAnalyses::all();
  229. PreservedAnalyses PA;
  230. PA.preserve<CallGraphAnalysis>();
  231. return PA;
  232. }
  233. namespace {
  234. class InternalizeLegacyPass : public ModulePass {
  235. // Client supplied callback to control wheter a symbol must be preserved.
  236. std::function<bool(const GlobalValue &)> MustPreserveGV;
  237. public:
  238. static char ID; // Pass identification, replacement for typeid
  239. InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
  240. InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
  241. : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
  242. initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
  243. }
  244. bool runOnModule(Module &M) override {
  245. if (skipModule(M))
  246. return false;
  247. CallGraphWrapperPass *CGPass =
  248. getAnalysisIfAvailable<CallGraphWrapperPass>();
  249. CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
  250. return internalizeModule(M, MustPreserveGV, CG);
  251. }
  252. void getAnalysisUsage(AnalysisUsage &AU) const override {
  253. AU.setPreservesCFG();
  254. AU.addPreserved<CallGraphWrapperPass>();
  255. }
  256. };
  257. }
  258. char InternalizeLegacyPass::ID = 0;
  259. INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
  260. "Internalize Global Symbols", false, false)
  261. ModulePass *llvm::createInternalizePass() {
  262. return new InternalizeLegacyPass();
  263. }
  264. ModulePass *llvm::createInternalizePass(
  265. std::function<bool(const GlobalValue &)> MustPreserveGV) {
  266. return new InternalizeLegacyPass(std::move(MustPreserveGV));
  267. }