ModuleSummaryAnalysis.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970
  1. //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass builds a ModuleSummaryIndex object for the module, to be written
  10. // to bitcode or LLVM assembly.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Analysis/ModuleSummaryAnalysis.h"
  14. #include "llvm/ADT/ArrayRef.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/MapVector.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SetVector.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/Analysis/BlockFrequencyInfo.h"
  23. #include "llvm/Analysis/BranchProbabilityInfo.h"
  24. #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
  25. #include "llvm/Analysis/LoopInfo.h"
  26. #include "llvm/Analysis/ProfileSummaryInfo.h"
  27. #include "llvm/Analysis/StackSafetyAnalysis.h"
  28. #include "llvm/Analysis/TypeMetadataUtils.h"
  29. #include "llvm/IR/Attributes.h"
  30. #include "llvm/IR/BasicBlock.h"
  31. #include "llvm/IR/Constant.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/Dominators.h"
  34. #include "llvm/IR/Function.h"
  35. #include "llvm/IR/GlobalAlias.h"
  36. #include "llvm/IR/GlobalValue.h"
  37. #include "llvm/IR/GlobalVariable.h"
  38. #include "llvm/IR/Instructions.h"
  39. #include "llvm/IR/IntrinsicInst.h"
  40. #include "llvm/IR/Intrinsics.h"
  41. #include "llvm/IR/Metadata.h"
  42. #include "llvm/IR/Module.h"
  43. #include "llvm/IR/ModuleSummaryIndex.h"
  44. #include "llvm/IR/Use.h"
  45. #include "llvm/IR/User.h"
  46. #include "llvm/InitializePasses.h"
  47. #include "llvm/Object/ModuleSymbolTable.h"
  48. #include "llvm/Object/SymbolicFile.h"
  49. #include "llvm/Pass.h"
  50. #include "llvm/Support/Casting.h"
  51. #include "llvm/Support/CommandLine.h"
  52. #include "llvm/Support/FileSystem.h"
  53. #include <algorithm>
  54. #include <cassert>
  55. #include <cstdint>
  56. #include <vector>
  57. using namespace llvm;
  58. #define DEBUG_TYPE "module-summary-analysis"
  59. // Option to force edges cold which will block importing when the
  60. // -import-cold-multiplier is set to 0. Useful for debugging.
  61. FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold =
  62. FunctionSummary::FSHT_None;
  63. cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
  64. "force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold),
  65. cl::desc("Force all edges in the function summary to cold"),
  66. cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."),
  67. clEnumValN(FunctionSummary::FSHT_AllNonCritical,
  68. "all-non-critical", "All non-critical edges."),
  69. clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
  70. cl::opt<std::string> ModuleSummaryDotFile(
  71. "module-summary-dot-file", cl::init(""), cl::Hidden,
  72. cl::value_desc("filename"),
  73. cl::desc("File to emit dot graph of new summary into."));
  74. // Walk through the operands of a given User via worklist iteration and populate
  75. // the set of GlobalValue references encountered. Invoked either on an
  76. // Instruction or a GlobalVariable (which walks its initializer).
  77. // Return true if any of the operands contains blockaddress. This is important
  78. // to know when computing summary for global var, because if global variable
  79. // references basic block address we can't import it separately from function
  80. // containing that basic block. For simplicity we currently don't import such
  81. // global vars at all. When importing function we aren't interested if any
  82. // instruction in it takes an address of any basic block, because instruction
  83. // can only take an address of basic block located in the same function.
  84. static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
  85. SetVector<ValueInfo> &RefEdges,
  86. SmallPtrSet<const User *, 8> &Visited) {
  87. bool HasBlockAddress = false;
  88. SmallVector<const User *, 32> Worklist;
  89. if (Visited.insert(CurUser).second)
  90. Worklist.push_back(CurUser);
  91. while (!Worklist.empty()) {
  92. const User *U = Worklist.pop_back_val();
  93. const auto *CB = dyn_cast<CallBase>(U);
  94. for (const auto &OI : U->operands()) {
  95. const User *Operand = dyn_cast<User>(OI);
  96. if (!Operand)
  97. continue;
  98. if (isa<BlockAddress>(Operand)) {
  99. HasBlockAddress = true;
  100. continue;
  101. }
  102. if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
  103. // We have a reference to a global value. This should be added to
  104. // the reference set unless it is a callee. Callees are handled
  105. // specially by WriteFunction and are added to a separate list.
  106. if (!(CB && CB->isCallee(&OI)))
  107. RefEdges.insert(Index.getOrInsertValueInfo(GV));
  108. continue;
  109. }
  110. if (Visited.insert(Operand).second)
  111. Worklist.push_back(Operand);
  112. }
  113. }
  114. return HasBlockAddress;
  115. }
  116. static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
  117. ProfileSummaryInfo *PSI) {
  118. if (!PSI)
  119. return CalleeInfo::HotnessType::Unknown;
  120. if (PSI->isHotCount(ProfileCount))
  121. return CalleeInfo::HotnessType::Hot;
  122. if (PSI->isColdCount(ProfileCount))
  123. return CalleeInfo::HotnessType::Cold;
  124. return CalleeInfo::HotnessType::None;
  125. }
  126. static bool isNonRenamableLocal(const GlobalValue &GV) {
  127. return GV.hasSection() && GV.hasLocalLinkage();
  128. }
  129. /// Determine whether this call has all constant integer arguments (excluding
  130. /// "this") and summarize it to VCalls or ConstVCalls as appropriate.
  131. static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
  132. SetVector<FunctionSummary::VFuncId> &VCalls,
  133. SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
  134. std::vector<uint64_t> Args;
  135. // Start from the second argument to skip the "this" pointer.
  136. for (auto &Arg : drop_begin(Call.CB.args())) {
  137. auto *CI = dyn_cast<ConstantInt>(Arg);
  138. if (!CI || CI->getBitWidth() > 64) {
  139. VCalls.insert({Guid, Call.Offset});
  140. return;
  141. }
  142. Args.push_back(CI->getZExtValue());
  143. }
  144. ConstVCalls.insert({{Guid, Call.Offset}, std::move(Args)});
  145. }
  146. /// If this intrinsic call requires that we add information to the function
  147. /// summary, do so via the non-constant reference arguments.
  148. static void addIntrinsicToSummary(
  149. const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests,
  150. SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
  151. SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
  152. SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
  153. SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
  154. DominatorTree &DT) {
  155. switch (CI->getCalledFunction()->getIntrinsicID()) {
  156. case Intrinsic::type_test: {
  157. auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
  158. auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
  159. if (!TypeId)
  160. break;
  161. GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
  162. // Produce a summary from type.test intrinsics. We only summarize type.test
  163. // intrinsics that are used other than by an llvm.assume intrinsic.
  164. // Intrinsics that are assumed are relevant only to the devirtualization
  165. // pass, not the type test lowering pass.
  166. bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
  167. return !isa<AssumeInst>(CIU.getUser());
  168. });
  169. if (HasNonAssumeUses)
  170. TypeTests.insert(Guid);
  171. SmallVector<DevirtCallSite, 4> DevirtCalls;
  172. SmallVector<CallInst *, 4> Assumes;
  173. findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
  174. for (auto &Call : DevirtCalls)
  175. addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
  176. TypeTestAssumeConstVCalls);
  177. break;
  178. }
  179. case Intrinsic::type_checked_load: {
  180. auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2));
  181. auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
  182. if (!TypeId)
  183. break;
  184. GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
  185. SmallVector<DevirtCallSite, 4> DevirtCalls;
  186. SmallVector<Instruction *, 4> LoadedPtrs;
  187. SmallVector<Instruction *, 4> Preds;
  188. bool HasNonCallUses = false;
  189. findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
  190. HasNonCallUses, CI, DT);
  191. // Any non-call uses of the result of llvm.type.checked.load will
  192. // prevent us from optimizing away the llvm.type.test.
  193. if (HasNonCallUses)
  194. TypeTests.insert(Guid);
  195. for (auto &Call : DevirtCalls)
  196. addVCallToSet(Call, Guid, TypeCheckedLoadVCalls,
  197. TypeCheckedLoadConstVCalls);
  198. break;
  199. }
  200. default:
  201. break;
  202. }
  203. }
  204. static bool isNonVolatileLoad(const Instruction *I) {
  205. if (const auto *LI = dyn_cast<LoadInst>(I))
  206. return !LI->isVolatile();
  207. return false;
  208. }
  209. static bool isNonVolatileStore(const Instruction *I) {
  210. if (const auto *SI = dyn_cast<StoreInst>(I))
  211. return !SI->isVolatile();
  212. return false;
  213. }
  214. // Returns true if the function definition must be unreachable.
  215. //
  216. // Note if this helper function returns true, `F` is guaranteed
  217. // to be unreachable; if it returns false, `F` might still
  218. // be unreachable but not covered by this helper function.
  219. static bool mustBeUnreachableFunction(const Function &F) {
  220. // A function must be unreachable if its entry block ends with an
  221. // 'unreachable'.
  222. assert(!F.isDeclaration());
  223. return isa<UnreachableInst>(F.getEntryBlock().getTerminator());
  224. }
  225. static void computeFunctionSummary(
  226. ModuleSummaryIndex &Index, const Module &M, const Function &F,
  227. BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT,
  228. bool HasLocalsInUsedOrAsm, DenseSet<GlobalValue::GUID> &CantBePromoted,
  229. bool IsThinLTO,
  230. std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) {
  231. // Summary not currently supported for anonymous functions, they should
  232. // have been named.
  233. assert(F.hasName());
  234. unsigned NumInsts = 0;
  235. // Map from callee ValueId to profile count. Used to accumulate profile
  236. // counts for all static calls to a given callee.
  237. MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
  238. SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
  239. SetVector<GlobalValue::GUID> TypeTests;
  240. SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
  241. TypeCheckedLoadVCalls;
  242. SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
  243. TypeCheckedLoadConstVCalls;
  244. ICallPromotionAnalysis ICallAnalysis;
  245. SmallPtrSet<const User *, 8> Visited;
  246. // Add personality function, prefix data and prologue data to function's ref
  247. // list.
  248. findRefEdges(Index, &F, RefEdges, Visited);
  249. std::vector<const Instruction *> NonVolatileLoads;
  250. std::vector<const Instruction *> NonVolatileStores;
  251. bool HasInlineAsmMaybeReferencingInternal = false;
  252. bool HasIndirBranchToBlockAddress = false;
  253. bool HasUnknownCall = false;
  254. bool MayThrow = false;
  255. for (const BasicBlock &BB : F) {
  256. // We don't allow inlining of function with indirect branch to blockaddress.
  257. // If the blockaddress escapes the function, e.g., via a global variable,
  258. // inlining may lead to an invalid cross-function reference. So we shouldn't
  259. // import such function either.
  260. if (BB.hasAddressTaken()) {
  261. for (User *U : BlockAddress::get(const_cast<BasicBlock *>(&BB))->users())
  262. if (!isa<CallBrInst>(*U)) {
  263. HasIndirBranchToBlockAddress = true;
  264. break;
  265. }
  266. }
  267. for (const Instruction &I : BB) {
  268. if (I.isDebugOrPseudoInst())
  269. continue;
  270. ++NumInsts;
  271. // Regular LTO module doesn't participate in ThinLTO import,
  272. // so no reference from it can be read/writeonly, since this
  273. // would require importing variable as local copy
  274. if (IsThinLTO) {
  275. if (isNonVolatileLoad(&I)) {
  276. // Postpone processing of non-volatile load instructions
  277. // See comments below
  278. Visited.insert(&I);
  279. NonVolatileLoads.push_back(&I);
  280. continue;
  281. } else if (isNonVolatileStore(&I)) {
  282. Visited.insert(&I);
  283. NonVolatileStores.push_back(&I);
  284. // All references from second operand of store (destination address)
  285. // can be considered write-only if they're not referenced by any
  286. // non-store instruction. References from first operand of store
  287. // (stored value) can't be treated either as read- or as write-only
  288. // so we add them to RefEdges as we do with all other instructions
  289. // except non-volatile load.
  290. Value *Stored = I.getOperand(0);
  291. if (auto *GV = dyn_cast<GlobalValue>(Stored))
  292. // findRefEdges will try to examine GV operands, so instead
  293. // of calling it we should add GV to RefEdges directly.
  294. RefEdges.insert(Index.getOrInsertValueInfo(GV));
  295. else if (auto *U = dyn_cast<User>(Stored))
  296. findRefEdges(Index, U, RefEdges, Visited);
  297. continue;
  298. }
  299. }
  300. findRefEdges(Index, &I, RefEdges, Visited);
  301. const auto *CB = dyn_cast<CallBase>(&I);
  302. if (!CB) {
  303. if (I.mayThrow())
  304. MayThrow = true;
  305. continue;
  306. }
  307. const auto *CI = dyn_cast<CallInst>(&I);
  308. // Since we don't know exactly which local values are referenced in inline
  309. // assembly, conservatively mark the function as possibly referencing
  310. // a local value from inline assembly to ensure we don't export a
  311. // reference (which would require renaming and promotion of the
  312. // referenced value).
  313. if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
  314. HasInlineAsmMaybeReferencingInternal = true;
  315. auto *CalledValue = CB->getCalledOperand();
  316. auto *CalledFunction = CB->getCalledFunction();
  317. if (CalledValue && !CalledFunction) {
  318. CalledValue = CalledValue->stripPointerCasts();
  319. // Stripping pointer casts can reveal a called function.
  320. CalledFunction = dyn_cast<Function>(CalledValue);
  321. }
  322. // Check if this is an alias to a function. If so, get the
  323. // called aliasee for the checks below.
  324. if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
  325. assert(!CalledFunction && "Expected null called function in callsite for alias");
  326. CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
  327. }
  328. // Check if this is a direct call to a known function or a known
  329. // intrinsic, or an indirect call with profile data.
  330. if (CalledFunction) {
  331. if (CI && CalledFunction->isIntrinsic()) {
  332. addIntrinsicToSummary(
  333. CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
  334. TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
  335. continue;
  336. }
  337. // We should have named any anonymous globals
  338. assert(CalledFunction->hasName());
  339. auto ScaledCount = PSI->getProfileCount(*CB, BFI);
  340. auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
  341. : CalleeInfo::HotnessType::Unknown;
  342. if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None)
  343. Hotness = CalleeInfo::HotnessType::Cold;
  344. // Use the original CalledValue, in case it was an alias. We want
  345. // to record the call edge to the alias in that case. Eventually
  346. // an alias summary will be created to associate the alias and
  347. // aliasee.
  348. auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
  349. cast<GlobalValue>(CalledValue))];
  350. ValueInfo.updateHotness(Hotness);
  351. // Add the relative block frequency to CalleeInfo if there is no profile
  352. // information.
  353. if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
  354. uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency();
  355. uint64_t EntryFreq = BFI->getEntryFreq();
  356. ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
  357. }
  358. } else {
  359. HasUnknownCall = true;
  360. // Skip inline assembly calls.
  361. if (CI && CI->isInlineAsm())
  362. continue;
  363. // Skip direct calls.
  364. if (!CalledValue || isa<Constant>(CalledValue))
  365. continue;
  366. // Check if the instruction has a callees metadata. If so, add callees
  367. // to CallGraphEdges to reflect the references from the metadata, and
  368. // to enable importing for subsequent indirect call promotion and
  369. // inlining.
  370. if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) {
  371. for (auto &Op : MD->operands()) {
  372. Function *Callee = mdconst::extract_or_null<Function>(Op);
  373. if (Callee)
  374. CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
  375. }
  376. }
  377. uint32_t NumVals, NumCandidates;
  378. uint64_t TotalCount;
  379. auto CandidateProfileData =
  380. ICallAnalysis.getPromotionCandidatesForInstruction(
  381. &I, NumVals, TotalCount, NumCandidates);
  382. for (auto &Candidate : CandidateProfileData)
  383. CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
  384. .updateHotness(getHotness(Candidate.Count, PSI));
  385. }
  386. }
  387. }
  388. Index.addBlockCount(F.size());
  389. std::vector<ValueInfo> Refs;
  390. if (IsThinLTO) {
  391. auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
  392. SetVector<ValueInfo> &Edges,
  393. SmallPtrSet<const User *, 8> &Cache) {
  394. for (const auto *I : Instrs) {
  395. Cache.erase(I);
  396. findRefEdges(Index, I, Edges, Cache);
  397. }
  398. };
  399. // By now we processed all instructions in a function, except
  400. // non-volatile loads and non-volatile value stores. Let's find
  401. // ref edges for both of instruction sets
  402. AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
  403. // We can add some values to the Visited set when processing load
  404. // instructions which are also used by stores in NonVolatileStores.
  405. // For example this can happen if we have following code:
  406. //
  407. // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**)
  408. // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**)
  409. //
  410. // After processing loads we'll add bitcast to the Visited set, and if
  411. // we use the same set while processing stores, we'll never see store
  412. // to @bar and @bar will be mistakenly treated as readonly.
  413. SmallPtrSet<const llvm::User *, 8> StoreCache;
  414. AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
  415. // If both load and store instruction reference the same variable
  416. // we won't be able to optimize it. Add all such reference edges
  417. // to RefEdges set.
  418. for (auto &VI : StoreRefEdges)
  419. if (LoadRefEdges.remove(VI))
  420. RefEdges.insert(VI);
  421. unsigned RefCnt = RefEdges.size();
  422. // All new reference edges inserted in two loops below are either
  423. // read or write only. They will be grouped in the end of RefEdges
  424. // vector, so we can use a single integer value to identify them.
  425. for (auto &VI : LoadRefEdges)
  426. RefEdges.insert(VI);
  427. unsigned FirstWORef = RefEdges.size();
  428. for (auto &VI : StoreRefEdges)
  429. RefEdges.insert(VI);
  430. Refs = RefEdges.takeVector();
  431. for (; RefCnt < FirstWORef; ++RefCnt)
  432. Refs[RefCnt].setReadOnly();
  433. for (; RefCnt < Refs.size(); ++RefCnt)
  434. Refs[RefCnt].setWriteOnly();
  435. } else {
  436. Refs = RefEdges.takeVector();
  437. }
  438. // Explicit add hot edges to enforce importing for designated GUIDs for
  439. // sample PGO, to enable the same inlines as the profiled optimized binary.
  440. for (auto &I : F.getImportGUIDs())
  441. CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness(
  442. ForceSummaryEdgesCold == FunctionSummary::FSHT_All
  443. ? CalleeInfo::HotnessType::Cold
  444. : CalleeInfo::HotnessType::Critical);
  445. bool NonRenamableLocal = isNonRenamableLocal(F);
  446. bool NotEligibleForImport = NonRenamableLocal ||
  447. HasInlineAsmMaybeReferencingInternal ||
  448. HasIndirBranchToBlockAddress;
  449. GlobalValueSummary::GVFlags Flags(
  450. F.getLinkage(), F.getVisibility(), NotEligibleForImport,
  451. /* Live = */ false, F.isDSOLocal(),
  452. F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
  453. FunctionSummary::FFlags FunFlags{
  454. F.hasFnAttribute(Attribute::ReadNone),
  455. F.hasFnAttribute(Attribute::ReadOnly),
  456. F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
  457. // FIXME: refactor this to use the same code that inliner is using.
  458. // Don't try to import functions with noinline attribute.
  459. F.getAttributes().hasFnAttr(Attribute::NoInline),
  460. F.hasFnAttribute(Attribute::AlwaysInline),
  461. F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall,
  462. mustBeUnreachableFunction(F)};
  463. std::vector<FunctionSummary::ParamAccess> ParamAccesses;
  464. if (auto *SSI = GetSSICallback(F))
  465. ParamAccesses = SSI->getParamAccesses(Index);
  466. auto FuncSummary = std::make_unique<FunctionSummary>(
  467. Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
  468. CallGraphEdges.takeVector(), TypeTests.takeVector(),
  469. TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
  470. TypeTestAssumeConstVCalls.takeVector(),
  471. TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses));
  472. if (NonRenamableLocal)
  473. CantBePromoted.insert(F.getGUID());
  474. Index.addGlobalValueSummary(F, std::move(FuncSummary));
  475. }
  476. /// Find function pointers referenced within the given vtable initializer
  477. /// (or subset of an initializer) \p I. The starting offset of \p I within
  478. /// the vtable initializer is \p StartingOffset. Any discovered function
  479. /// pointers are added to \p VTableFuncs along with their cumulative offset
  480. /// within the initializer.
  481. static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
  482. const Module &M, ModuleSummaryIndex &Index,
  483. VTableFuncList &VTableFuncs) {
  484. // First check if this is a function pointer.
  485. if (I->getType()->isPointerTy()) {
  486. auto Fn = dyn_cast<Function>(I->stripPointerCasts());
  487. // We can disregard __cxa_pure_virtual as a possible call target, as
  488. // calls to pure virtuals are UB.
  489. if (Fn && Fn->getName() != "__cxa_pure_virtual")
  490. VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
  491. return;
  492. }
  493. // Walk through the elements in the constant struct or array and recursively
  494. // look for virtual function pointers.
  495. const DataLayout &DL = M.getDataLayout();
  496. if (auto *C = dyn_cast<ConstantStruct>(I)) {
  497. StructType *STy = dyn_cast<StructType>(C->getType());
  498. assert(STy);
  499. const StructLayout *SL = DL.getStructLayout(C->getType());
  500. for (auto EI : llvm::enumerate(STy->elements())) {
  501. auto Offset = SL->getElementOffset(EI.index());
  502. unsigned Op = SL->getElementContainingOffset(Offset);
  503. findFuncPointers(cast<Constant>(I->getOperand(Op)),
  504. StartingOffset + Offset, M, Index, VTableFuncs);
  505. }
  506. } else if (auto *C = dyn_cast<ConstantArray>(I)) {
  507. ArrayType *ATy = C->getType();
  508. Type *EltTy = ATy->getElementType();
  509. uint64_t EltSize = DL.getTypeAllocSize(EltTy);
  510. for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
  511. findFuncPointers(cast<Constant>(I->getOperand(i)),
  512. StartingOffset + i * EltSize, M, Index, VTableFuncs);
  513. }
  514. }
  515. }
  516. // Identify the function pointers referenced by vtable definition \p V.
  517. static void computeVTableFuncs(ModuleSummaryIndex &Index,
  518. const GlobalVariable &V, const Module &M,
  519. VTableFuncList &VTableFuncs) {
  520. if (!V.isConstant())
  521. return;
  522. findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
  523. VTableFuncs);
  524. #ifndef NDEBUG
  525. // Validate that the VTableFuncs list is ordered by offset.
  526. uint64_t PrevOffset = 0;
  527. for (auto &P : VTableFuncs) {
  528. // The findVFuncPointers traversal should have encountered the
  529. // functions in offset order. We need to use ">=" since PrevOffset
  530. // starts at 0.
  531. assert(P.VTableOffset >= PrevOffset);
  532. PrevOffset = P.VTableOffset;
  533. }
  534. #endif
  535. }
  536. /// Record vtable definition \p V for each type metadata it references.
  537. static void
  538. recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index,
  539. const GlobalVariable &V,
  540. SmallVectorImpl<MDNode *> &Types) {
  541. for (MDNode *Type : Types) {
  542. auto TypeID = Type->getOperand(1).get();
  543. uint64_t Offset =
  544. cast<ConstantInt>(
  545. cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
  546. ->getZExtValue();
  547. if (auto *TypeId = dyn_cast<MDString>(TypeID))
  548. Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
  549. .push_back({Offset, Index.getOrInsertValueInfo(&V)});
  550. }
  551. }
  552. static void computeVariableSummary(ModuleSummaryIndex &Index,
  553. const GlobalVariable &V,
  554. DenseSet<GlobalValue::GUID> &CantBePromoted,
  555. const Module &M,
  556. SmallVectorImpl<MDNode *> &Types) {
  557. SetVector<ValueInfo> RefEdges;
  558. SmallPtrSet<const User *, 8> Visited;
  559. bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
  560. bool NonRenamableLocal = isNonRenamableLocal(V);
  561. GlobalValueSummary::GVFlags Flags(
  562. V.getLinkage(), V.getVisibility(), NonRenamableLocal,
  563. /* Live = */ false, V.isDSOLocal(),
  564. V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
  565. VTableFuncList VTableFuncs;
  566. // If splitting is not enabled, then we compute the summary information
  567. // necessary for index-based whole program devirtualization.
  568. if (!Index.enableSplitLTOUnit()) {
  569. Types.clear();
  570. V.getMetadata(LLVMContext::MD_type, Types);
  571. if (!Types.empty()) {
  572. // Identify the function pointers referenced by this vtable definition.
  573. computeVTableFuncs(Index, V, M, VTableFuncs);
  574. // Record this vtable definition for each type metadata it references.
  575. recordTypeIdCompatibleVtableReferences(Index, V, Types);
  576. }
  577. }
  578. // Don't mark variables we won't be able to internalize as read/write-only.
  579. bool CanBeInternalized =
  580. !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
  581. !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
  582. bool Constant = V.isConstant();
  583. GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized,
  584. Constant ? false : CanBeInternalized,
  585. Constant, V.getVCallVisibility());
  586. auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags,
  587. RefEdges.takeVector());
  588. if (NonRenamableLocal)
  589. CantBePromoted.insert(V.getGUID());
  590. if (HasBlockAddress)
  591. GVarSummary->setNotEligibleToImport();
  592. if (!VTableFuncs.empty())
  593. GVarSummary->setVTableFuncs(VTableFuncs);
  594. Index.addGlobalValueSummary(V, std::move(GVarSummary));
  595. }
  596. static void
  597. computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
  598. DenseSet<GlobalValue::GUID> &CantBePromoted) {
  599. bool NonRenamableLocal = isNonRenamableLocal(A);
  600. GlobalValueSummary::GVFlags Flags(
  601. A.getLinkage(), A.getVisibility(), NonRenamableLocal,
  602. /* Live = */ false, A.isDSOLocal(),
  603. A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
  604. auto AS = std::make_unique<AliasSummary>(Flags);
  605. auto *Aliasee = A.getAliaseeObject();
  606. auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
  607. assert(AliaseeVI && "Alias expects aliasee summary to be available");
  608. assert(AliaseeVI.getSummaryList().size() == 1 &&
  609. "Expected a single entry per aliasee in per-module index");
  610. AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
  611. if (NonRenamableLocal)
  612. CantBePromoted.insert(A.getGUID());
  613. Index.addGlobalValueSummary(A, std::move(AS));
  614. }
  615. // Set LiveRoot flag on entries matching the given value name.
  616. static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
  617. if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
  618. for (auto &Summary : VI.getSummaryList())
  619. Summary->setLive(true);
  620. }
  621. ModuleSummaryIndex llvm::buildModuleSummaryIndex(
  622. const Module &M,
  623. std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
  624. ProfileSummaryInfo *PSI,
  625. std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) {
  626. assert(PSI);
  627. bool EnableSplitLTOUnit = false;
  628. if (auto *MD = mdconst::extract_or_null<ConstantInt>(
  629. M.getModuleFlag("EnableSplitLTOUnit")))
  630. EnableSplitLTOUnit = MD->getZExtValue();
  631. ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
  632. // Identify the local values in the llvm.used and llvm.compiler.used sets,
  633. // which should not be exported as they would then require renaming and
  634. // promotion, but we may have opaque uses e.g. in inline asm. We collect them
  635. // here because we use this information to mark functions containing inline
  636. // assembly calls as not importable.
  637. SmallPtrSet<GlobalValue *, 4> LocalsUsed;
  638. SmallVector<GlobalValue *, 4> Used;
  639. // First collect those in the llvm.used set.
  640. collectUsedGlobalVariables(M, Used, /*CompilerUsed=*/false);
  641. // Next collect those in the llvm.compiler.used set.
  642. collectUsedGlobalVariables(M, Used, /*CompilerUsed=*/true);
  643. DenseSet<GlobalValue::GUID> CantBePromoted;
  644. for (auto *V : Used) {
  645. if (V->hasLocalLinkage()) {
  646. LocalsUsed.insert(V);
  647. CantBePromoted.insert(V->getGUID());
  648. }
  649. }
  650. bool HasLocalInlineAsmSymbol = false;
  651. if (!M.getModuleInlineAsm().empty()) {
  652. // Collect the local values defined by module level asm, and set up
  653. // summaries for these symbols so that they can be marked as NoRename,
  654. // to prevent export of any use of them in regular IR that would require
  655. // renaming within the module level asm. Note we don't need to create a
  656. // summary for weak or global defs, as they don't need to be flagged as
  657. // NoRename, and defs in module level asm can't be imported anyway.
  658. // Also, any values used but not defined within module level asm should
  659. // be listed on the llvm.used or llvm.compiler.used global and marked as
  660. // referenced from there.
  661. ModuleSymbolTable::CollectAsmSymbols(
  662. M, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
  663. // Symbols not marked as Weak or Global are local definitions.
  664. if (Flags & (object::BasicSymbolRef::SF_Weak |
  665. object::BasicSymbolRef::SF_Global))
  666. return;
  667. HasLocalInlineAsmSymbol = true;
  668. GlobalValue *GV = M.getNamedValue(Name);
  669. if (!GV)
  670. return;
  671. assert(GV->isDeclaration() && "Def in module asm already has definition");
  672. GlobalValueSummary::GVFlags GVFlags(
  673. GlobalValue::InternalLinkage, GlobalValue::DefaultVisibility,
  674. /* NotEligibleToImport = */ true,
  675. /* Live = */ true,
  676. /* Local */ GV->isDSOLocal(),
  677. GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
  678. CantBePromoted.insert(GV->getGUID());
  679. // Create the appropriate summary type.
  680. if (Function *F = dyn_cast<Function>(GV)) {
  681. std::unique_ptr<FunctionSummary> Summary =
  682. std::make_unique<FunctionSummary>(
  683. GVFlags, /*InstCount=*/0,
  684. FunctionSummary::FFlags{
  685. F->hasFnAttribute(Attribute::ReadNone),
  686. F->hasFnAttribute(Attribute::ReadOnly),
  687. F->hasFnAttribute(Attribute::NoRecurse),
  688. F->returnDoesNotAlias(),
  689. /* NoInline = */ false,
  690. F->hasFnAttribute(Attribute::AlwaysInline),
  691. F->hasFnAttribute(Attribute::NoUnwind),
  692. /* MayThrow */ true,
  693. /* HasUnknownCall */ true,
  694. /* MustBeUnreachable */ false},
  695. /*EntryCount=*/0, ArrayRef<ValueInfo>{},
  696. ArrayRef<FunctionSummary::EdgeTy>{},
  697. ArrayRef<GlobalValue::GUID>{},
  698. ArrayRef<FunctionSummary::VFuncId>{},
  699. ArrayRef<FunctionSummary::VFuncId>{},
  700. ArrayRef<FunctionSummary::ConstVCall>{},
  701. ArrayRef<FunctionSummary::ConstVCall>{},
  702. ArrayRef<FunctionSummary::ParamAccess>{});
  703. Index.addGlobalValueSummary(*GV, std::move(Summary));
  704. } else {
  705. std::unique_ptr<GlobalVarSummary> Summary =
  706. std::make_unique<GlobalVarSummary>(
  707. GVFlags,
  708. GlobalVarSummary::GVarFlags(
  709. false, false, cast<GlobalVariable>(GV)->isConstant(),
  710. GlobalObject::VCallVisibilityPublic),
  711. ArrayRef<ValueInfo>{});
  712. Index.addGlobalValueSummary(*GV, std::move(Summary));
  713. }
  714. });
  715. }
  716. bool IsThinLTO = true;
  717. if (auto *MD =
  718. mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
  719. IsThinLTO = MD->getZExtValue();
  720. // Compute summaries for all functions defined in module, and save in the
  721. // index.
  722. for (auto &F : M) {
  723. if (F.isDeclaration())
  724. continue;
  725. DominatorTree DT(const_cast<Function &>(F));
  726. BlockFrequencyInfo *BFI = nullptr;
  727. std::unique_ptr<BlockFrequencyInfo> BFIPtr;
  728. if (GetBFICallback)
  729. BFI = GetBFICallback(F);
  730. else if (F.hasProfileData()) {
  731. LoopInfo LI{DT};
  732. BranchProbabilityInfo BPI{F, LI};
  733. BFIPtr = std::make_unique<BlockFrequencyInfo>(F, BPI, LI);
  734. BFI = BFIPtr.get();
  735. }
  736. computeFunctionSummary(Index, M, F, BFI, PSI, DT,
  737. !LocalsUsed.empty() || HasLocalInlineAsmSymbol,
  738. CantBePromoted, IsThinLTO, GetSSICallback);
  739. }
  740. // Compute summaries for all variables defined in module, and save in the
  741. // index.
  742. SmallVector<MDNode *, 2> Types;
  743. for (const GlobalVariable &G : M.globals()) {
  744. if (G.isDeclaration())
  745. continue;
  746. computeVariableSummary(Index, G, CantBePromoted, M, Types);
  747. }
  748. // Compute summaries for all aliases defined in module, and save in the
  749. // index.
  750. for (const GlobalAlias &A : M.aliases())
  751. computeAliasSummary(Index, A, CantBePromoted);
  752. for (auto *V : LocalsUsed) {
  753. auto *Summary = Index.getGlobalValueSummary(*V);
  754. assert(Summary && "Missing summary for global value");
  755. Summary->setNotEligibleToImport();
  756. }
  757. // The linker doesn't know about these LLVM produced values, so we need
  758. // to flag them as live in the index to ensure index-based dead value
  759. // analysis treats them as live roots of the analysis.
  760. setLiveRoot(Index, "llvm.used");
  761. setLiveRoot(Index, "llvm.compiler.used");
  762. setLiveRoot(Index, "llvm.global_ctors");
  763. setLiveRoot(Index, "llvm.global_dtors");
  764. setLiveRoot(Index, "llvm.global.annotations");
  765. for (auto &GlobalList : Index) {
  766. // Ignore entries for references that are undefined in the current module.
  767. if (GlobalList.second.SummaryList.empty())
  768. continue;
  769. assert(GlobalList.second.SummaryList.size() == 1 &&
  770. "Expected module's index to have one summary per GUID");
  771. auto &Summary = GlobalList.second.SummaryList[0];
  772. if (!IsThinLTO) {
  773. Summary->setNotEligibleToImport();
  774. continue;
  775. }
  776. bool AllRefsCanBeExternallyReferenced =
  777. llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
  778. return !CantBePromoted.count(VI.getGUID());
  779. });
  780. if (!AllRefsCanBeExternallyReferenced) {
  781. Summary->setNotEligibleToImport();
  782. continue;
  783. }
  784. if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
  785. bool AllCallsCanBeExternallyReferenced = llvm::all_of(
  786. FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
  787. return !CantBePromoted.count(Edge.first.getGUID());
  788. });
  789. if (!AllCallsCanBeExternallyReferenced)
  790. Summary->setNotEligibleToImport();
  791. }
  792. }
  793. if (!ModuleSummaryDotFile.empty()) {
  794. std::error_code EC;
  795. raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None);
  796. if (EC)
  797. report_fatal_error(Twine("Failed to open dot file ") +
  798. ModuleSummaryDotFile + ": " + EC.message() + "\n");
  799. Index.exportToDot(OSDot, {});
  800. }
  801. return Index;
  802. }
  803. AnalysisKey ModuleSummaryIndexAnalysis::Key;
  804. ModuleSummaryIndex
  805. ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
  806. ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
  807. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  808. bool NeedSSI = needsParamAccessSummary(M);
  809. return buildModuleSummaryIndex(
  810. M,
  811. [&FAM](const Function &F) {
  812. return &FAM.getResult<BlockFrequencyAnalysis>(
  813. *const_cast<Function *>(&F));
  814. },
  815. &PSI,
  816. [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * {
  817. return NeedSSI ? &FAM.getResult<StackSafetyAnalysis>(
  818. const_cast<Function &>(F))
  819. : nullptr;
  820. });
  821. }
  822. char ModuleSummaryIndexWrapperPass::ID = 0;
  823. INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
  824. "Module Summary Analysis", false, true)
  825. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  826. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  827. INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass)
  828. INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
  829. "Module Summary Analysis", false, true)
  830. ModulePass *llvm::createModuleSummaryIndexWrapperPass() {
  831. return new ModuleSummaryIndexWrapperPass();
  832. }
  833. ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
  834. : ModulePass(ID) {
  835. initializeModuleSummaryIndexWrapperPassPass(*PassRegistry::getPassRegistry());
  836. }
  837. bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
  838. auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  839. bool NeedSSI = needsParamAccessSummary(M);
  840. Index.emplace(buildModuleSummaryIndex(
  841. M,
  842. [this](const Function &F) {
  843. return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
  844. *const_cast<Function *>(&F))
  845. .getBFI());
  846. },
  847. PSI,
  848. [&](const Function &F) -> const StackSafetyInfo * {
  849. return NeedSSI ? &getAnalysis<StackSafetyInfoWrapperPass>(
  850. const_cast<Function &>(F))
  851. .getResult()
  852. : nullptr;
  853. }));
  854. return false;
  855. }
  856. bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) {
  857. Index.reset();
  858. return false;
  859. }
  860. void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
  861. AU.setPreservesAll();
  862. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  863. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  864. AU.addRequired<StackSafetyInfoWrapperPass>();
  865. }
  866. char ImmutableModuleSummaryIndexWrapperPass::ID = 0;
  867. ImmutableModuleSummaryIndexWrapperPass::ImmutableModuleSummaryIndexWrapperPass(
  868. const ModuleSummaryIndex *Index)
  869. : ImmutablePass(ID), Index(Index) {
  870. initializeImmutableModuleSummaryIndexWrapperPassPass(
  871. *PassRegistry::getPassRegistry());
  872. }
  873. void ImmutableModuleSummaryIndexWrapperPass::getAnalysisUsage(
  874. AnalysisUsage &AU) const {
  875. AU.setPreservesAll();
  876. }
  877. ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass(
  878. const ModuleSummaryIndex *Index) {
  879. return new ImmutableModuleSummaryIndexWrapperPass(Index);
  880. }
  881. INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info",
  882. "Module summary info", false, true)