Internalize.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. //===-- Internalize.cpp - Mark functions internal -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass loops over all of the functions and variables in the input module.
  10. // If the function or variable does not need to be preserved according to the
  11. // client supplied callback, it is marked as internal.
  12. //
  13. // This transformation would not be legal in a regular compilation, but it gets
  14. // extra information from the linker about what is safe.
  15. //
  16. // For example: Internalizing a function with external linkage. Only if we are
  17. // told it is only used from within this module, it is safe to do it.
  18. //
  19. //===----------------------------------------------------------------------===//
  20. #include "llvm/Transforms/IPO/Internalize.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/ADT/StringSet.h"
  23. #include "llvm/ADT/Triple.h"
  24. #include "llvm/Analysis/CallGraph.h"
  25. #include "llvm/IR/Module.h"
  26. #include "llvm/InitializePasses.h"
  27. #include "llvm/Pass.h"
  28. #include "llvm/Support/CommandLine.h"
  29. #include "llvm/Support/Debug.h"
  30. #include "llvm/Support/GlobPattern.h"
  31. #include "llvm/Support/LineIterator.h"
  32. #include "llvm/Support/MemoryBuffer.h"
  33. #include "llvm/Support/raw_ostream.h"
  34. #include "llvm/Transforms/IPO.h"
  35. using namespace llvm;
  36. #define DEBUG_TYPE "internalize"
  37. STATISTIC(NumAliases, "Number of aliases internalized");
  38. STATISTIC(NumFunctions, "Number of functions internalized");
  39. STATISTIC(NumGlobals, "Number of global vars internalized");
  40. // APIFile - A file which contains a list of symbol glob patterns that should
  41. // not be marked external.
  42. static cl::opt<std::string>
  43. APIFile("internalize-public-api-file", cl::value_desc("filename"),
  44. cl::desc("A file containing list of symbol names to preserve"));
  45. // APIList - A list of symbol glob patterns that should not be marked internal.
  46. static cl::list<std::string>
  47. APIList("internalize-public-api-list", cl::value_desc("list"),
  48. cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
  49. namespace {
  50. // Helper to load an API list to preserve from file and expose it as a functor
  51. // for internalization.
  52. class PreserveAPIList {
  53. public:
  54. PreserveAPIList() {
  55. if (!APIFile.empty())
  56. LoadFile(APIFile);
  57. for (StringRef Pattern : APIList)
  58. addGlob(Pattern);
  59. }
  60. bool operator()(const GlobalValue &GV) {
  61. return llvm::any_of(
  62. ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
  63. }
  64. private:
  65. // Contains the set of symbols loaded from file
  66. SmallVector<GlobPattern> ExternalNames;
  67. void addGlob(StringRef Pattern) {
  68. auto GlobOrErr = GlobPattern::create(Pattern);
  69. if (!GlobOrErr) {
  70. errs() << "WARNING: when loading pattern: '"
  71. << toString(GlobOrErr.takeError()) << "' ignoring";
  72. return;
  73. }
  74. ExternalNames.emplace_back(std::move(*GlobOrErr));
  75. }
  76. void LoadFile(StringRef Filename) {
  77. // Load the APIFile...
  78. ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
  79. MemoryBuffer::getFile(Filename);
  80. if (!BufOrErr) {
  81. errs() << "WARNING: Internalize couldn't load file '" << Filename
  82. << "'! Continuing as if it's empty.\n";
  83. return; // Just continue as if the file were empty
  84. }
  85. Buf = std::move(*BufOrErr);
  86. for (line_iterator I(*Buf, true), E; I != E; ++I)
  87. addGlob(*I);
  88. }
  89. std::shared_ptr<MemoryBuffer> Buf;
  90. };
  91. } // end anonymous namespace
  92. bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
  93. // Function must be defined here
  94. if (GV.isDeclaration())
  95. return true;
  96. // Available externally is really just a "declaration with a body".
  97. if (GV.hasAvailableExternallyLinkage())
  98. return true;
  99. // Assume that dllexported symbols are referenced elsewhere
  100. if (GV.hasDLLExportStorageClass())
  101. return true;
  102. // As the name suggests, externally initialized variables need preserving as
  103. // they would be initialized elsewhere externally.
  104. if (const auto *G = dyn_cast<GlobalVariable>(&GV))
  105. if (G->isExternallyInitialized())
  106. return true;
  107. // Already local, has nothing to do.
  108. if (GV.hasLocalLinkage())
  109. return false;
  110. // Check some special cases
  111. if (AlwaysPreserved.count(GV.getName()))
  112. return true;
  113. return MustPreserveGV(GV);
  114. }
  115. bool InternalizePass::maybeInternalize(
  116. GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
  117. SmallString<0> ComdatName;
  118. if (Comdat *C = GV.getComdat()) {
  119. // For GlobalAlias, C is the aliasee object's comdat which may have been
  120. // redirected. So ComdatMap may not contain C.
  121. if (ComdatMap.lookup(C).External)
  122. return false;
  123. if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
  124. // If a comdat with one member is not externally visible, we can drop it.
  125. // Otherwise, the comdat can be used to establish dependencies among the
  126. // group of sections. Thus we have to keep the comdat but switch it to
  127. // nodeduplicate.
  128. // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
  129. // nodeduplicate.
  130. ComdatInfo &Info = ComdatMap.find(C)->second;
  131. if (Info.Size == 1)
  132. GO->setComdat(nullptr);
  133. else if (!IsWasm)
  134. C->setSelectionKind(Comdat::NoDeduplicate);
  135. }
  136. if (GV.hasLocalLinkage())
  137. return false;
  138. } else {
  139. if (GV.hasLocalLinkage())
  140. return false;
  141. if (shouldPreserveGV(GV))
  142. return false;
  143. }
  144. GV.setVisibility(GlobalValue::DefaultVisibility);
  145. GV.setLinkage(GlobalValue::InternalLinkage);
  146. return true;
  147. }
  148. // If GV is part of a comdat and is externally visible, update the comdat size
  149. // and keep track of its comdat so that we don't internalize any of its members.
  150. void InternalizePass::checkComdat(
  151. GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
  152. Comdat *C = GV.getComdat();
  153. if (!C)
  154. return;
  155. ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
  156. ++Info.Size;
  157. if (shouldPreserveGV(GV))
  158. Info.External = true;
  159. }
  160. bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
  161. bool Changed = false;
  162. CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
  163. SmallVector<GlobalValue *, 4> Used;
  164. collectUsedGlobalVariables(M, Used, false);
  165. // Collect comdat size and visiblity information for the module.
  166. DenseMap<const Comdat *, ComdatInfo> ComdatMap;
  167. if (!M.getComdatSymbolTable().empty()) {
  168. for (Function &F : M)
  169. checkComdat(F, ComdatMap);
  170. for (GlobalVariable &GV : M.globals())
  171. checkComdat(GV, ComdatMap);
  172. for (GlobalAlias &GA : M.aliases())
  173. checkComdat(GA, ComdatMap);
  174. }
  175. // We must assume that globals in llvm.used have a reference that not even
  176. // the linker can see, so we don't internalize them.
  177. // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
  178. // linker can drop those symbols. If this pass is running as part of LTO,
  179. // one might think that it could just drop llvm.compiler.used. The problem
  180. // is that even in LTO llvm doesn't see every reference. For example,
  181. // we don't see references from function local inline assembly. To be
  182. // conservative, we internalize symbols in llvm.compiler.used, but we
  183. // keep llvm.compiler.used so that the symbol is not deleted by llvm.
  184. for (GlobalValue *V : Used) {
  185. AlwaysPreserved.insert(V->getName());
  186. }
  187. // Never internalize the llvm.used symbol. It is used to implement
  188. // attribute((used)).
  189. // FIXME: Shouldn't this just filter on llvm.metadata section??
  190. AlwaysPreserved.insert("llvm.used");
  191. AlwaysPreserved.insert("llvm.compiler.used");
  192. // Never internalize anchors used by the machine module info, else the info
  193. // won't find them. (see MachineModuleInfo.)
  194. AlwaysPreserved.insert("llvm.global_ctors");
  195. AlwaysPreserved.insert("llvm.global_dtors");
  196. AlwaysPreserved.insert("llvm.global.annotations");
  197. // Never internalize symbols code-gen inserts.
  198. // FIXME: We should probably add this (and the __stack_chk_guard) via some
  199. // type of call-back in CodeGen.
  200. AlwaysPreserved.insert("__stack_chk_fail");
  201. if (Triple(M.getTargetTriple()).isOSAIX())
  202. AlwaysPreserved.insert("__ssp_canary_word");
  203. else
  204. AlwaysPreserved.insert("__stack_chk_guard");
  205. // Mark all functions not in the api as internal.
  206. IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
  207. for (Function &I : M) {
  208. if (!maybeInternalize(I, ComdatMap))
  209. continue;
  210. Changed = true;
  211. if (ExternalNode)
  212. // Remove a callgraph edge from the external node to this function.
  213. ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
  214. ++NumFunctions;
  215. LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
  216. }
  217. // Mark all global variables with initializers that are not in the api as
  218. // internal as well.
  219. for (auto &GV : M.globals()) {
  220. if (!maybeInternalize(GV, ComdatMap))
  221. continue;
  222. Changed = true;
  223. ++NumGlobals;
  224. LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
  225. }
  226. // Mark all aliases that are not in the api as internal as well.
  227. for (auto &GA : M.aliases()) {
  228. if (!maybeInternalize(GA, ComdatMap))
  229. continue;
  230. Changed = true;
  231. ++NumAliases;
  232. LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
  233. }
  234. return Changed;
  235. }
  236. InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
  237. PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
  238. if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
  239. return PreservedAnalyses::all();
  240. PreservedAnalyses PA;
  241. PA.preserve<CallGraphAnalysis>();
  242. return PA;
  243. }
  244. namespace {
  245. class InternalizeLegacyPass : public ModulePass {
  246. // Client supplied callback to control wheter a symbol must be preserved.
  247. std::function<bool(const GlobalValue &)> MustPreserveGV;
  248. public:
  249. static char ID; // Pass identification, replacement for typeid
  250. InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
  251. InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
  252. : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
  253. initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
  254. }
  255. bool runOnModule(Module &M) override {
  256. if (skipModule(M))
  257. return false;
  258. CallGraphWrapperPass *CGPass =
  259. getAnalysisIfAvailable<CallGraphWrapperPass>();
  260. CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
  261. return internalizeModule(M, MustPreserveGV, CG);
  262. }
  263. void getAnalysisUsage(AnalysisUsage &AU) const override {
  264. AU.setPreservesCFG();
  265. AU.addPreserved<CallGraphWrapperPass>();
  266. }
  267. };
  268. }
  269. char InternalizeLegacyPass::ID = 0;
  270. INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
  271. "Internalize Global Symbols", false, false)
  272. ModulePass *llvm::createInternalizePass() {
  273. return new InternalizeLegacyPass();
  274. }
  275. ModulePass *llvm::createInternalizePass(
  276. std::function<bool(const GlobalValue &)> MustPreserveGV) {
  277. return new InternalizeLegacyPass(std::move(MustPreserveGV));
  278. }