InstrProfiling.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290
  1. //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
  10. // It also builds the data structures and initialization code needed for
  11. // updating execution counts and emitting the profile at runtime.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
  15. #include "llvm/ADT/ArrayRef.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/ADT/StringRef.h"
  18. #include "llvm/ADT/Triple.h"
  19. #include "llvm/ADT/Twine.h"
  20. #include "llvm/Analysis/BlockFrequencyInfo.h"
  21. #include "llvm/Analysis/BranchProbabilityInfo.h"
  22. #include "llvm/Analysis/LoopInfo.h"
  23. #include "llvm/Analysis/TargetLibraryInfo.h"
  24. #include "llvm/IR/Attributes.h"
  25. #include "llvm/IR/BasicBlock.h"
  26. #include "llvm/IR/Constant.h"
  27. #include "llvm/IR/Constants.h"
  28. #include "llvm/IR/DIBuilder.h"
  29. #include "llvm/IR/DerivedTypes.h"
  30. #include "llvm/IR/DiagnosticInfo.h"
  31. #include "llvm/IR/Dominators.h"
  32. #include "llvm/IR/Function.h"
  33. #include "llvm/IR/GlobalValue.h"
  34. #include "llvm/IR/GlobalVariable.h"
  35. #include "llvm/IR/IRBuilder.h"
  36. #include "llvm/IR/Instruction.h"
  37. #include "llvm/IR/Instructions.h"
  38. #include "llvm/IR/IntrinsicInst.h"
  39. #include "llvm/IR/Module.h"
  40. #include "llvm/IR/Type.h"
  41. #include "llvm/InitializePasses.h"
  42. #include "llvm/Pass.h"
  43. #include "llvm/ProfileData/InstrProf.h"
  44. #include "llvm/ProfileData/InstrProfCorrelator.h"
  45. #include "llvm/Support/Casting.h"
  46. #include "llvm/Support/CommandLine.h"
  47. #include "llvm/Support/Error.h"
  48. #include "llvm/Support/ErrorHandling.h"
  49. #include "llvm/Transforms/Utils/ModuleUtils.h"
  50. #include "llvm/Transforms/Utils/SSAUpdater.h"
  51. #include <algorithm>
  52. #include <cassert>
  53. #include <cstdint>
  54. #include <string>
  55. using namespace llvm;
  56. #define DEBUG_TYPE "instrprof"
  57. namespace llvm {
  58. cl::opt<bool>
  59. DebugInfoCorrelate("debug-info-correlate",
  60. cl::desc("Use debug info to correlate profiles."),
  61. cl::init(false));
  62. } // namespace llvm
  63. namespace {
  64. cl::opt<bool> DoHashBasedCounterSplit(
  65. "hash-based-counter-split",
  66. cl::desc("Rename counter variable of a comdat function based on cfg hash"),
  67. cl::init(true));
  68. cl::opt<bool>
  69. RuntimeCounterRelocation("runtime-counter-relocation",
  70. cl::desc("Enable relocating counters at runtime."),
  71. cl::init(false));
  72. cl::opt<bool> ValueProfileStaticAlloc(
  73. "vp-static-alloc",
  74. cl::desc("Do static counter allocation for value profiler"),
  75. cl::init(true));
  76. cl::opt<double> NumCountersPerValueSite(
  77. "vp-counters-per-site",
  78. cl::desc("The average number of profile counters allocated "
  79. "per value profiling site."),
  80. // This is set to a very small value because in real programs, only
  81. // a very small percentage of value sites have non-zero targets, e.g, 1/30.
  82. // For those sites with non-zero profile, the average number of targets
  83. // is usually smaller than 2.
  84. cl::init(1.0));
  85. cl::opt<bool> AtomicCounterUpdateAll(
  86. "instrprof-atomic-counter-update-all",
  87. cl::desc("Make all profile counter updates atomic (for testing only)"),
  88. cl::init(false));
  89. cl::opt<bool> AtomicCounterUpdatePromoted(
  90. "atomic-counter-update-promoted",
  91. cl::desc("Do counter update using atomic fetch add "
  92. " for promoted counters only"),
  93. cl::init(false));
  94. cl::opt<bool> AtomicFirstCounter(
  95. "atomic-first-counter",
  96. cl::desc("Use atomic fetch add for first counter in a function (usually "
  97. "the entry counter)"),
  98. cl::init(false));
  99. // If the option is not specified, the default behavior about whether
  100. // counter promotion is done depends on how instrumentaiton lowering
  101. // pipeline is setup, i.e., the default value of true of this option
  102. // does not mean the promotion will be done by default. Explicitly
  103. // setting this option can override the default behavior.
  104. cl::opt<bool> DoCounterPromotion("do-counter-promotion",
  105. cl::desc("Do counter register promotion"),
  106. cl::init(false));
  107. cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
  108. "max-counter-promotions-per-loop", cl::init(20),
  109. cl::desc("Max number counter promotions per loop to avoid"
  110. " increasing register pressure too much"));
  111. // A debug option
  112. cl::opt<int>
  113. MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
  114. cl::desc("Max number of allowed counter promotions"));
  115. cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
  116. "speculative-counter-promotion-max-exiting", cl::init(3),
  117. cl::desc("The max number of exiting blocks of a loop to allow "
  118. " speculative counter promotion"));
  119. cl::opt<bool> SpeculativeCounterPromotionToLoop(
  120. "speculative-counter-promotion-to-loop",
  121. cl::desc("When the option is false, if the target block is in a loop, "
  122. "the promotion will be disallowed unless the promoted counter "
  123. " update can be further/iteratively promoted into an acyclic "
  124. " region."));
  125. cl::opt<bool> IterativeCounterPromotion(
  126. "iterative-counter-promotion", cl::init(true),
  127. cl::desc("Allow counter promotion across the whole loop nest."));
  128. cl::opt<bool> SkipRetExitBlock(
  129. "skip-ret-exit-block", cl::init(true),
  130. cl::desc("Suppress counter promotion if exit blocks contain ret."));
  131. ///
  132. /// A helper class to promote one counter RMW operation in the loop
  133. /// into register update.
  134. ///
  135. /// RWM update for the counter will be sinked out of the loop after
  136. /// the transformation.
  137. ///
  138. class PGOCounterPromoterHelper : public LoadAndStorePromoter {
  139. public:
  140. PGOCounterPromoterHelper(
  141. Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
  142. BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
  143. ArrayRef<Instruction *> InsertPts,
  144. DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
  145. LoopInfo &LI)
  146. : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
  147. InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
  148. assert(isa<LoadInst>(L));
  149. assert(isa<StoreInst>(S));
  150. SSA.AddAvailableValue(PH, Init);
  151. }
  152. void doExtraRewritesBeforeFinalDeletion() override {
  153. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
  154. BasicBlock *ExitBlock = ExitBlocks[i];
  155. Instruction *InsertPos = InsertPts[i];
  156. // Get LiveIn value into the ExitBlock. If there are multiple
  157. // predecessors, the value is defined by a PHI node in this
  158. // block.
  159. Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
  160. Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
  161. Type *Ty = LiveInValue->getType();
  162. IRBuilder<> Builder(InsertPos);
  163. if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
  164. // If isRuntimeCounterRelocationEnabled() is true then the address of
  165. // the store instruction is computed with two instructions in
  166. // InstrProfiling::getCounterAddress(). We need to copy those
  167. // instructions to this block to compute Addr correctly.
  168. // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
  169. // %Addr = inttoptr i64 %BiasAdd to i64*
  170. auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
  171. assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
  172. Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
  173. Addr = Builder.CreateIntToPtr(BiasInst, Ty->getPointerTo());
  174. }
  175. if (AtomicCounterUpdatePromoted)
  176. // automic update currently can only be promoted across the current
  177. // loop, not the whole loop nest.
  178. Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
  179. MaybeAlign(),
  180. AtomicOrdering::SequentiallyConsistent);
  181. else {
  182. LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
  183. auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
  184. auto *NewStore = Builder.CreateStore(NewVal, Addr);
  185. // Now update the parent loop's candidate list:
  186. if (IterativeCounterPromotion) {
  187. auto *TargetLoop = LI.getLoopFor(ExitBlock);
  188. if (TargetLoop)
  189. LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
  190. }
  191. }
  192. }
  193. }
  194. private:
  195. Instruction *Store;
  196. ArrayRef<BasicBlock *> ExitBlocks;
  197. ArrayRef<Instruction *> InsertPts;
  198. DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
  199. LoopInfo &LI;
  200. };
  201. /// A helper class to do register promotion for all profile counter
  202. /// updates in a loop.
  203. ///
  204. class PGOCounterPromoter {
  205. public:
  206. PGOCounterPromoter(
  207. DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
  208. Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
  209. : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
  210. // Skip collection of ExitBlocks and InsertPts for loops that will not be
  211. // able to have counters promoted.
  212. SmallVector<BasicBlock *, 8> LoopExitBlocks;
  213. SmallPtrSet<BasicBlock *, 8> BlockSet;
  214. L.getExitBlocks(LoopExitBlocks);
  215. if (!isPromotionPossible(&L, LoopExitBlocks))
  216. return;
  217. for (BasicBlock *ExitBlock : LoopExitBlocks) {
  218. if (BlockSet.insert(ExitBlock).second) {
  219. ExitBlocks.push_back(ExitBlock);
  220. InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
  221. }
  222. }
  223. }
  224. bool run(int64_t *NumPromoted) {
  225. // Skip 'infinite' loops:
  226. if (ExitBlocks.size() == 0)
  227. return false;
  228. // Skip if any of the ExitBlocks contains a ret instruction.
  229. // This is to prevent dumping of incomplete profile -- if the
  230. // the loop is a long running loop and dump is called in the middle
  231. // of the loop, the result profile is incomplete.
  232. // FIXME: add other heuristics to detect long running loops.
  233. if (SkipRetExitBlock) {
  234. for (auto *BB : ExitBlocks)
  235. if (isa<ReturnInst>(BB->getTerminator()))
  236. return false;
  237. }
  238. unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
  239. if (MaxProm == 0)
  240. return false;
  241. unsigned Promoted = 0;
  242. for (auto &Cand : LoopToCandidates[&L]) {
  243. SmallVector<PHINode *, 4> NewPHIs;
  244. SSAUpdater SSA(&NewPHIs);
  245. Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
  246. // If BFI is set, we will use it to guide the promotions.
  247. if (BFI) {
  248. auto *BB = Cand.first->getParent();
  249. auto InstrCount = BFI->getBlockProfileCount(BB);
  250. if (!InstrCount)
  251. continue;
  252. auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
  253. // If the average loop trip count is not greater than 1.5, we skip
  254. // promotion.
  255. if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
  256. continue;
  257. }
  258. PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
  259. L.getLoopPreheader(), ExitBlocks,
  260. InsertPts, LoopToCandidates, LI);
  261. Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
  262. Promoted++;
  263. if (Promoted >= MaxProm)
  264. break;
  265. (*NumPromoted)++;
  266. if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
  267. break;
  268. }
  269. LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
  270. << L.getLoopDepth() << ")\n");
  271. return Promoted != 0;
  272. }
  273. private:
  274. bool allowSpeculativeCounterPromotion(Loop *LP) {
  275. SmallVector<BasicBlock *, 8> ExitingBlocks;
  276. L.getExitingBlocks(ExitingBlocks);
  277. // Not considierered speculative.
  278. if (ExitingBlocks.size() == 1)
  279. return true;
  280. if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
  281. return false;
  282. return true;
  283. }
  284. // Check whether the loop satisfies the basic conditions needed to perform
  285. // Counter Promotions.
  286. bool
  287. isPromotionPossible(Loop *LP,
  288. const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
  289. // We can't insert into a catchswitch.
  290. if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
  291. return isa<CatchSwitchInst>(Exit->getTerminator());
  292. }))
  293. return false;
  294. if (!LP->hasDedicatedExits())
  295. return false;
  296. BasicBlock *PH = LP->getLoopPreheader();
  297. if (!PH)
  298. return false;
  299. return true;
  300. }
  301. // Returns the max number of Counter Promotions for LP.
  302. unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
  303. SmallVector<BasicBlock *, 8> LoopExitBlocks;
  304. LP->getExitBlocks(LoopExitBlocks);
  305. if (!isPromotionPossible(LP, LoopExitBlocks))
  306. return 0;
  307. SmallVector<BasicBlock *, 8> ExitingBlocks;
  308. LP->getExitingBlocks(ExitingBlocks);
  309. // If BFI is set, we do more aggressive promotions based on BFI.
  310. if (BFI)
  311. return (unsigned)-1;
  312. // Not considierered speculative.
  313. if (ExitingBlocks.size() == 1)
  314. return MaxNumOfPromotionsPerLoop;
  315. if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
  316. return 0;
  317. // Whether the target block is in a loop does not matter:
  318. if (SpeculativeCounterPromotionToLoop)
  319. return MaxNumOfPromotionsPerLoop;
  320. // Now check the target block:
  321. unsigned MaxProm = MaxNumOfPromotionsPerLoop;
  322. for (auto *TargetBlock : LoopExitBlocks) {
  323. auto *TargetLoop = LI.getLoopFor(TargetBlock);
  324. if (!TargetLoop)
  325. continue;
  326. unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
  327. unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
  328. MaxProm =
  329. std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
  330. PendingCandsInTarget);
  331. }
  332. return MaxProm;
  333. }
  334. DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
  335. SmallVector<BasicBlock *, 8> ExitBlocks;
  336. SmallVector<Instruction *, 8> InsertPts;
  337. Loop &L;
  338. LoopInfo &LI;
  339. BlockFrequencyInfo *BFI;
  340. };
  341. enum class ValueProfilingCallType {
  342. // Individual values are tracked. Currently used for indiret call target
  343. // profiling.
  344. Default,
  345. // MemOp: the memop size value profiling.
  346. MemOp
  347. };
  348. } // end anonymous namespace
  349. PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
  350. FunctionAnalysisManager &FAM =
  351. AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  352. auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
  353. return FAM.getResult<TargetLibraryAnalysis>(F);
  354. };
  355. if (!run(M, GetTLI))
  356. return PreservedAnalyses::all();
  357. return PreservedAnalyses::none();
  358. }
  359. bool InstrProfiling::lowerIntrinsics(Function *F) {
  360. bool MadeChange = false;
  361. PromotionCandidates.clear();
  362. for (BasicBlock &BB : *F) {
  363. for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
  364. if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) {
  365. lowerIncrement(IPIS);
  366. MadeChange = true;
  367. } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
  368. lowerIncrement(IPI);
  369. MadeChange = true;
  370. } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
  371. lowerCover(IPC);
  372. MadeChange = true;
  373. } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
  374. lowerValueProfileInst(IPVP);
  375. MadeChange = true;
  376. }
  377. }
  378. }
  379. if (!MadeChange)
  380. return false;
  381. promoteCounterLoadStores(F);
  382. return true;
  383. }
  384. bool InstrProfiling::isRuntimeCounterRelocationEnabled() const {
  385. // Mach-O don't support weak external references.
  386. if (TT.isOSBinFormatMachO())
  387. return false;
  388. if (RuntimeCounterRelocation.getNumOccurrences() > 0)
  389. return RuntimeCounterRelocation;
  390. // Fuchsia uses runtime counter relocation by default.
  391. return TT.isOSFuchsia();
  392. }
  393. bool InstrProfiling::isCounterPromotionEnabled() const {
  394. if (DoCounterPromotion.getNumOccurrences() > 0)
  395. return DoCounterPromotion;
  396. return Options.DoCounterPromotion;
  397. }
  398. void InstrProfiling::promoteCounterLoadStores(Function *F) {
  399. if (!isCounterPromotionEnabled())
  400. return;
  401. DominatorTree DT(*F);
  402. LoopInfo LI(DT);
  403. DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
  404. std::unique_ptr<BlockFrequencyInfo> BFI;
  405. if (Options.UseBFIInPromotion) {
  406. std::unique_ptr<BranchProbabilityInfo> BPI;
  407. BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
  408. BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
  409. }
  410. for (const auto &LoadStore : PromotionCandidates) {
  411. auto *CounterLoad = LoadStore.first;
  412. auto *CounterStore = LoadStore.second;
  413. BasicBlock *BB = CounterLoad->getParent();
  414. Loop *ParentLoop = LI.getLoopFor(BB);
  415. if (!ParentLoop)
  416. continue;
  417. LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
  418. }
  419. SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
  420. // Do a post-order traversal of the loops so that counter updates can be
  421. // iteratively hoisted outside the loop nest.
  422. for (auto *Loop : llvm::reverse(Loops)) {
  423. PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
  424. Promoter.run(&TotalCountersPromoted);
  425. }
  426. }
  427. static bool needsRuntimeHookUnconditionally(const Triple &TT) {
  428. // On Fuchsia, we only need runtime hook if any counters are present.
  429. if (TT.isOSFuchsia())
  430. return false;
  431. return true;
  432. }
  433. /// Check if the module contains uses of any profiling intrinsics.
  434. static bool containsProfilingIntrinsics(Module &M) {
  435. auto containsIntrinsic = [&](int ID) {
  436. if (auto *F = M.getFunction(Intrinsic::getName(ID)))
  437. return !F->use_empty();
  438. return false;
  439. };
  440. return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
  441. containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
  442. containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
  443. containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
  444. }
  445. bool InstrProfiling::run(
  446. Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
  447. this->M = &M;
  448. this->GetTLI = std::move(GetTLI);
  449. NamesVar = nullptr;
  450. NamesSize = 0;
  451. ProfileDataMap.clear();
  452. CompilerUsedVars.clear();
  453. UsedVars.clear();
  454. TT = Triple(M.getTargetTriple());
  455. bool MadeChange = false;
  456. bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
  457. if (NeedsRuntimeHook)
  458. MadeChange = emitRuntimeHook();
  459. bool ContainsProfiling = containsProfilingIntrinsics(M);
  460. GlobalVariable *CoverageNamesVar =
  461. M.getNamedGlobal(getCoverageUnusedNamesVarName());
  462. // Improve compile time by avoiding linear scans when there is no work.
  463. if (!ContainsProfiling && !CoverageNamesVar)
  464. return MadeChange;
  465. // We did not know how many value sites there would be inside
  466. // the instrumented function. This is counting the number of instrumented
  467. // target value sites to enter it as field in the profile data variable.
  468. for (Function &F : M) {
  469. InstrProfIncrementInst *FirstProfIncInst = nullptr;
  470. for (BasicBlock &BB : F)
  471. for (auto I = BB.begin(), E = BB.end(); I != E; I++)
  472. if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
  473. computeNumValueSiteCounts(Ind);
  474. else if (FirstProfIncInst == nullptr)
  475. FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
  476. // Value profiling intrinsic lowering requires per-function profile data
  477. // variable to be created first.
  478. if (FirstProfIncInst != nullptr)
  479. static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
  480. }
  481. for (Function &F : M)
  482. MadeChange |= lowerIntrinsics(&F);
  483. if (CoverageNamesVar) {
  484. lowerCoverageData(CoverageNamesVar);
  485. MadeChange = true;
  486. }
  487. if (!MadeChange)
  488. return false;
  489. emitVNodes();
  490. emitNameData();
  491. // Emit runtime hook for the cases where the target does not unconditionally
  492. // require pulling in profile runtime, and coverage is enabled on code that is
  493. // not eliminated by the front-end, e.g. unused functions with internal
  494. // linkage.
  495. if (!NeedsRuntimeHook && ContainsProfiling)
  496. emitRuntimeHook();
  497. emitRegistration();
  498. emitUses();
  499. emitInitialization();
  500. return true;
  501. }
  502. static FunctionCallee getOrInsertValueProfilingCall(
  503. Module &M, const TargetLibraryInfo &TLI,
  504. ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
  505. LLVMContext &Ctx = M.getContext();
  506. auto *ReturnTy = Type::getVoidTy(M.getContext());
  507. AttributeList AL;
  508. if (auto AK = TLI.getExtAttrForI32Param(false))
  509. AL = AL.addParamAttribute(M.getContext(), 2, AK);
  510. assert((CallType == ValueProfilingCallType::Default ||
  511. CallType == ValueProfilingCallType::MemOp) &&
  512. "Must be Default or MemOp");
  513. Type *ParamTypes[] = {
  514. #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
  515. #include "llvm/ProfileData/InstrProfData.inc"
  516. };
  517. auto *ValueProfilingCallTy =
  518. FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
  519. StringRef FuncName = CallType == ValueProfilingCallType::Default
  520. ? getInstrProfValueProfFuncName()
  521. : getInstrProfValueProfMemOpFuncName();
  522. return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
  523. }
  524. void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
  525. GlobalVariable *Name = Ind->getName();
  526. uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
  527. uint64_t Index = Ind->getIndex()->getZExtValue();
  528. auto &PD = ProfileDataMap[Name];
  529. PD.NumValueSites[ValueKind] =
  530. std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
  531. }
  532. void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
  533. // TODO: Value profiling heavily depends on the data section which is omitted
  534. // in lightweight mode. We need to move the value profile pointer to the
  535. // Counter struct to get this working.
  536. assert(
  537. !DebugInfoCorrelate &&
  538. "Value profiling is not yet supported with lightweight instrumentation");
  539. GlobalVariable *Name = Ind->getName();
  540. auto It = ProfileDataMap.find(Name);
  541. assert(It != ProfileDataMap.end() && It->second.DataVar &&
  542. "value profiling detected in function with no counter incerement");
  543. GlobalVariable *DataVar = It->second.DataVar;
  544. uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
  545. uint64_t Index = Ind->getIndex()->getZExtValue();
  546. for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
  547. Index += It->second.NumValueSites[Kind];
  548. IRBuilder<> Builder(Ind);
  549. bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
  550. llvm::InstrProfValueKind::IPVK_MemOPSize);
  551. CallInst *Call = nullptr;
  552. auto *TLI = &GetTLI(*Ind->getFunction());
  553. // To support value profiling calls within Windows exception handlers, funclet
  554. // information contained within operand bundles needs to be copied over to
  555. // the library call. This is required for the IR to be processed by the
  556. // WinEHPrepare pass.
  557. SmallVector<OperandBundleDef, 1> OpBundles;
  558. Ind->getOperandBundlesAsDefs(OpBundles);
  559. if (!IsMemOpSize) {
  560. Value *Args[3] = {Ind->getTargetValue(),
  561. Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
  562. Builder.getInt32(Index)};
  563. Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args,
  564. OpBundles);
  565. } else {
  566. Value *Args[3] = {Ind->getTargetValue(),
  567. Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
  568. Builder.getInt32(Index)};
  569. Call = Builder.CreateCall(
  570. getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp),
  571. Args, OpBundles);
  572. }
  573. if (auto AK = TLI->getExtAttrForI32Param(false))
  574. Call->addParamAttr(2, AK);
  575. Ind->replaceAllUsesWith(Call);
  576. Ind->eraseFromParent();
  577. }
  578. Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
  579. auto *Counters = getOrCreateRegionCounters(I);
  580. IRBuilder<> Builder(I);
  581. auto *Addr = Builder.CreateConstInBoundsGEP2_32(
  582. Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
  583. if (!isRuntimeCounterRelocationEnabled())
  584. return Addr;
  585. Type *Int64Ty = Type::getInt64Ty(M->getContext());
  586. Function *Fn = I->getParent()->getParent();
  587. LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
  588. if (!BiasLI) {
  589. IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
  590. auto *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
  591. if (!Bias) {
  592. // Compiler must define this variable when runtime counter relocation
  593. // is being used. Runtime has a weak external reference that is used
  594. // to check whether that's the case or not.
  595. Bias = new GlobalVariable(
  596. *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
  597. Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
  598. Bias->setVisibility(GlobalVariable::HiddenVisibility);
  599. // A definition that's weak (linkonce_odr) without being in a COMDAT
  600. // section wouldn't lead to link errors, but it would lead to a dead
  601. // data word from every TU but one. Putting it in COMDAT ensures there
  602. // will be exactly one data slot in the link.
  603. if (TT.supportsCOMDAT())
  604. Bias->setComdat(M->getOrInsertComdat(Bias->getName()));
  605. }
  606. BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias);
  607. }
  608. auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
  609. return Builder.CreateIntToPtr(Add, Addr->getType());
  610. }
  611. void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
  612. auto *Addr = getCounterAddress(CoverInstruction);
  613. IRBuilder<> Builder(CoverInstruction);
  614. // We store zero to represent that this block is covered.
  615. Builder.CreateStore(Builder.getInt8(0), Addr);
  616. CoverInstruction->eraseFromParent();
  617. }
  618. void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
  619. auto *Addr = getCounterAddress(Inc);
  620. IRBuilder<> Builder(Inc);
  621. if (Options.Atomic || AtomicCounterUpdateAll ||
  622. (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
  623. Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
  624. MaybeAlign(), AtomicOrdering::Monotonic);
  625. } else {
  626. Value *IncStep = Inc->getStep();
  627. Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
  628. auto *Count = Builder.CreateAdd(Load, Inc->getStep());
  629. auto *Store = Builder.CreateStore(Count, Addr);
  630. if (isCounterPromotionEnabled())
  631. PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
  632. }
  633. Inc->eraseFromParent();
  634. }
  635. void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
  636. ConstantArray *Names =
  637. cast<ConstantArray>(CoverageNamesVar->getInitializer());
  638. for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
  639. Constant *NC = Names->getOperand(I);
  640. Value *V = NC->stripPointerCasts();
  641. assert(isa<GlobalVariable>(V) && "Missing reference to function name");
  642. GlobalVariable *Name = cast<GlobalVariable>(V);
  643. Name->setLinkage(GlobalValue::PrivateLinkage);
  644. ReferencedNames.push_back(Name);
  645. if (isa<ConstantExpr>(NC))
  646. NC->dropAllReferences();
  647. }
  648. CoverageNamesVar->eraseFromParent();
  649. }
  650. /// Get the name of a profiling variable for a particular function.
  651. static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
  652. bool &Renamed) {
  653. StringRef NamePrefix = getInstrProfNameVarPrefix();
  654. StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
  655. Function *F = Inc->getParent()->getParent();
  656. Module *M = F->getParent();
  657. if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
  658. !canRenameComdatFunc(*F)) {
  659. Renamed = false;
  660. return (Prefix + Name).str();
  661. }
  662. Renamed = true;
  663. uint64_t FuncHash = Inc->getHash()->getZExtValue();
  664. SmallVector<char, 24> HashPostfix;
  665. if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
  666. return (Prefix + Name).str();
  667. return (Prefix + Name + "." + Twine(FuncHash)).str();
  668. }
  669. static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
  670. auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
  671. if (!MD)
  672. return 0;
  673. // If the flag is a ConstantAsMetadata, it should be an integer representable
  674. // in 64-bits.
  675. return cast<ConstantInt>(MD->getValue())->getZExtValue();
  676. }
  677. static bool enablesValueProfiling(const Module &M) {
  678. return isIRPGOFlagSet(&M) ||
  679. getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
  680. }
  681. // Conservatively returns true if data variables may be referenced by code.
  682. static bool profDataReferencedByCode(const Module &M) {
  683. return enablesValueProfiling(M);
  684. }
  685. static inline bool shouldRecordFunctionAddr(Function *F) {
  686. // Only record function addresses if IR PGO is enabled or if clang value
  687. // profiling is enabled. Recording function addresses greatly increases object
  688. // file size, because it prevents the inliner from deleting functions that
  689. // have been inlined everywhere.
  690. if (!profDataReferencedByCode(*F->getParent()))
  691. return false;
  692. // Check the linkage
  693. bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
  694. if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
  695. !HasAvailableExternallyLinkage)
  696. return true;
  697. // A function marked 'alwaysinline' with available_externally linkage can't
  698. // have its address taken. Doing so would create an undefined external ref to
  699. // the function, which would fail to link.
  700. if (HasAvailableExternallyLinkage &&
  701. F->hasFnAttribute(Attribute::AlwaysInline))
  702. return false;
  703. // Prohibit function address recording if the function is both internal and
  704. // COMDAT. This avoids the profile data variable referencing internal symbols
  705. // in COMDAT.
  706. if (F->hasLocalLinkage() && F->hasComdat())
  707. return false;
  708. // Check uses of this function for other than direct calls or invokes to it.
  709. // Inline virtual functions have linkeOnceODR linkage. When a key method
  710. // exists, the vtable will only be emitted in the TU where the key method
  711. // is defined. In a TU where vtable is not available, the function won't
  712. // be 'addresstaken'. If its address is not recorded here, the profile data
  713. // with missing address may be picked by the linker leading to missing
  714. // indirect call target info.
  715. return F->hasAddressTaken() || F->hasLinkOnceLinkage();
  716. }
  717. static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
  718. // Don't do this for Darwin. compiler-rt uses linker magic.
  719. if (TT.isOSDarwin())
  720. return false;
  721. // Use linker script magic to get data/cnts/name start/end.
  722. if (TT.isOSAIX() || TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
  723. TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS() || TT.isOSWindows())
  724. return false;
  725. return true;
  726. }
  727. GlobalVariable *
  728. InstrProfiling::createRegionCounters(InstrProfInstBase *Inc, StringRef Name,
  729. GlobalValue::LinkageTypes Linkage) {
  730. uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
  731. auto &Ctx = M->getContext();
  732. GlobalVariable *GV;
  733. if (isa<InstrProfCoverInst>(Inc)) {
  734. auto *CounterTy = Type::getInt8Ty(Ctx);
  735. auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
  736. // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
  737. std::vector<Constant *> InitialValues(NumCounters,
  738. Constant::getAllOnesValue(CounterTy));
  739. GV = new GlobalVariable(*M, CounterArrTy, false, Linkage,
  740. ConstantArray::get(CounterArrTy, InitialValues),
  741. Name);
  742. GV->setAlignment(Align(1));
  743. } else {
  744. auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
  745. GV = new GlobalVariable(*M, CounterTy, false, Linkage,
  746. Constant::getNullValue(CounterTy), Name);
  747. GV->setAlignment(Align(8));
  748. }
  749. return GV;
  750. }
  751. GlobalVariable *
  752. InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
  753. GlobalVariable *NamePtr = Inc->getName();
  754. auto &PD = ProfileDataMap[NamePtr];
  755. if (PD.RegionCounters)
  756. return PD.RegionCounters;
  757. // Match the linkage and visibility of the name global.
  758. Function *Fn = Inc->getParent()->getParent();
  759. GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
  760. GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
  761. // Use internal rather than private linkage so the counter variable shows up
  762. // in the symbol table when using debug info for correlation.
  763. if (DebugInfoCorrelate && TT.isOSBinFormatMachO() &&
  764. Linkage == GlobalValue::PrivateLinkage)
  765. Linkage = GlobalValue::InternalLinkage;
  766. // Due to the limitation of binder as of 2021/09/28, the duplicate weak
  767. // symbols in the same csect won't be discarded. When there are duplicate weak
  768. // symbols, we can NOT guarantee that the relocations get resolved to the
  769. // intended weak symbol, so we can not ensure the correctness of the relative
  770. // CounterPtr, so we have to use private linkage for counter and data symbols.
  771. if (TT.isOSBinFormatXCOFF()) {
  772. Linkage = GlobalValue::PrivateLinkage;
  773. Visibility = GlobalValue::DefaultVisibility;
  774. }
  775. // Move the name variable to the right section. Place them in a COMDAT group
  776. // if the associated function is a COMDAT. This will make sure that only one
  777. // copy of counters of the COMDAT function will be emitted after linking. Keep
  778. // in mind that this pass may run before the inliner, so we need to create a
  779. // new comdat group for the counters and profiling data. If we use the comdat
  780. // of the parent function, that will result in relocations against discarded
  781. // sections.
  782. //
  783. // If the data variable is referenced by code, counters and data have to be
  784. // in different comdats for COFF because the Visual C++ linker will report
  785. // duplicate symbol errors if there are multiple external symbols with the
  786. // same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
  787. //
  788. // For ELF, when not using COMDAT, put counters, data and values into a
  789. // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
  790. // allows -z start-stop-gc to discard the entire group when the function is
  791. // discarded.
  792. bool DataReferencedByCode = profDataReferencedByCode(*M);
  793. bool NeedComdat = needsComdatForCounter(*Fn, *M);
  794. bool Renamed;
  795. std::string CntsVarName =
  796. getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
  797. std::string DataVarName =
  798. getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
  799. auto MaybeSetComdat = [&](GlobalVariable *GV) {
  800. bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
  801. if (UseComdat) {
  802. StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode
  803. ? GV->getName()
  804. : CntsVarName;
  805. Comdat *C = M->getOrInsertComdat(GroupName);
  806. if (!NeedComdat)
  807. C->setSelectionKind(Comdat::NoDeduplicate);
  808. GV->setComdat(C);
  809. // COFF doesn't allow the comdat group leader to have private linkage, so
  810. // upgrade private linkage to internal linkage to produce a symbol table
  811. // entry.
  812. if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
  813. GV->setLinkage(GlobalValue::InternalLinkage);
  814. }
  815. };
  816. uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
  817. LLVMContext &Ctx = M->getContext();
  818. auto *CounterPtr = createRegionCounters(Inc, CntsVarName, Linkage);
  819. CounterPtr->setVisibility(Visibility);
  820. CounterPtr->setSection(
  821. getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
  822. CounterPtr->setLinkage(Linkage);
  823. MaybeSetComdat(CounterPtr);
  824. PD.RegionCounters = CounterPtr;
  825. if (DebugInfoCorrelate) {
  826. if (auto *SP = Fn->getSubprogram()) {
  827. DIBuilder DB(*M, true, SP->getUnit());
  828. Metadata *FunctionNameAnnotation[] = {
  829. MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),
  830. MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),
  831. };
  832. Metadata *CFGHashAnnotation[] = {
  833. MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName),
  834. ConstantAsMetadata::get(Inc->getHash()),
  835. };
  836. Metadata *NumCountersAnnotation[] = {
  837. MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName),
  838. ConstantAsMetadata::get(Inc->getNumCounters()),
  839. };
  840. auto Annotations = DB.getOrCreateArray({
  841. MDNode::get(Ctx, FunctionNameAnnotation),
  842. MDNode::get(Ctx, CFGHashAnnotation),
  843. MDNode::get(Ctx, NumCountersAnnotation),
  844. });
  845. auto *DICounter = DB.createGlobalVariableExpression(
  846. SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
  847. /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
  848. CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
  849. /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
  850. Annotations);
  851. CounterPtr->addDebugInfo(DICounter);
  852. DB.finalize();
  853. } else {
  854. std::string Msg = ("Missing debug info for function " + Fn->getName() +
  855. "; required for profile correlation.")
  856. .str();
  857. Ctx.diagnose(
  858. DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
  859. }
  860. }
  861. auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
  862. // Allocate statically the array of pointers to value profile nodes for
  863. // the current function.
  864. Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
  865. uint64_t NS = 0;
  866. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
  867. NS += PD.NumValueSites[Kind];
  868. if (NS > 0 && ValueProfileStaticAlloc &&
  869. !needsRuntimeRegistrationOfSectionRange(TT)) {
  870. ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
  871. auto *ValuesVar = new GlobalVariable(
  872. *M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
  873. getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
  874. ValuesVar->setVisibility(Visibility);
  875. ValuesVar->setSection(
  876. getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
  877. ValuesVar->setAlignment(Align(8));
  878. MaybeSetComdat(ValuesVar);
  879. ValuesPtrExpr =
  880. ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
  881. }
  882. if (DebugInfoCorrelate) {
  883. // Mark the counter variable as used so that it isn't optimized out.
  884. CompilerUsedVars.push_back(PD.RegionCounters);
  885. return PD.RegionCounters;
  886. }
  887. // Create data variable.
  888. auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext());
  889. auto *Int16Ty = Type::getInt16Ty(Ctx);
  890. auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
  891. Type *DataTypes[] = {
  892. #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
  893. #include "llvm/ProfileData/InstrProfData.inc"
  894. };
  895. auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
  896. Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
  897. ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
  898. : ConstantPointerNull::get(Int8PtrTy);
  899. Constant *Int16ArrayVals[IPVK_Last + 1];
  900. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
  901. Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
  902. // If the data variable is not referenced by code (if we don't emit
  903. // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
  904. // data variable live under linker GC, the data variable can be private. This
  905. // optimization applies to ELF.
  906. //
  907. // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
  908. // to be false.
  909. //
  910. // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
  911. // that other copies must have the same CFG and cannot have value profiling.
  912. // If no hash suffix, other profd copies may be referenced by code.
  913. if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
  914. (TT.isOSBinFormatELF() ||
  915. (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
  916. Linkage = GlobalValue::PrivateLinkage;
  917. Visibility = GlobalValue::DefaultVisibility;
  918. }
  919. auto *Data =
  920. new GlobalVariable(*M, DataTy, false, Linkage, nullptr, DataVarName);
  921. // Reference the counter variable with a label difference (link-time
  922. // constant).
  923. auto *RelativeCounterPtr =
  924. ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
  925. ConstantExpr::getPtrToInt(Data, IntPtrTy));
  926. Constant *DataVals[] = {
  927. #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
  928. #include "llvm/ProfileData/InstrProfData.inc"
  929. };
  930. Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
  931. Data->setVisibility(Visibility);
  932. Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
  933. Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
  934. MaybeSetComdat(Data);
  935. PD.DataVar = Data;
  936. // Mark the data variable as used so that it isn't stripped out.
  937. CompilerUsedVars.push_back(Data);
  938. // Now that the linkage set by the FE has been passed to the data and counter
  939. // variables, reset Name variable's linkage and visibility to private so that
  940. // it can be removed later by the compiler.
  941. NamePtr->setLinkage(GlobalValue::PrivateLinkage);
  942. // Collect the referenced names to be used by emitNameData.
  943. ReferencedNames.push_back(NamePtr);
  944. return PD.RegionCounters;
  945. }
  946. void InstrProfiling::emitVNodes() {
  947. if (!ValueProfileStaticAlloc)
  948. return;
  949. // For now only support this on platforms that do
  950. // not require runtime registration to discover
  951. // named section start/end.
  952. if (needsRuntimeRegistrationOfSectionRange(TT))
  953. return;
  954. size_t TotalNS = 0;
  955. for (auto &PD : ProfileDataMap) {
  956. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
  957. TotalNS += PD.second.NumValueSites[Kind];
  958. }
  959. if (!TotalNS)
  960. return;
  961. uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
  962. // Heuristic for small programs with very few total value sites.
  963. // The default value of vp-counters-per-site is chosen based on
  964. // the observation that large apps usually have a low percentage
  965. // of value sites that actually have any profile data, and thus
  966. // the average number of counters per site is low. For small
  967. // apps with very few sites, this may not be true. Bump up the
  968. // number of counters in this case.
  969. #define INSTR_PROF_MIN_VAL_COUNTS 10
  970. if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
  971. NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
  972. auto &Ctx = M->getContext();
  973. Type *VNodeTypes[] = {
  974. #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
  975. #include "llvm/ProfileData/InstrProfData.inc"
  976. };
  977. auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
  978. ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
  979. auto *VNodesVar = new GlobalVariable(
  980. *M, VNodesTy, false, GlobalValue::PrivateLinkage,
  981. Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
  982. VNodesVar->setSection(
  983. getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
  984. // VNodesVar is used by runtime but not referenced via relocation by other
  985. // sections. Conservatively make it linker retained.
  986. UsedVars.push_back(VNodesVar);
  987. }
  988. void InstrProfiling::emitNameData() {
  989. std::string UncompressedData;
  990. if (ReferencedNames.empty())
  991. return;
  992. std::string CompressedNameStr;
  993. if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
  994. DoInstrProfNameCompression)) {
  995. report_fatal_error(Twine(toString(std::move(E))), false);
  996. }
  997. auto &Ctx = M->getContext();
  998. auto *NamesVal =
  999. ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
  1000. NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
  1001. GlobalValue::PrivateLinkage, NamesVal,
  1002. getInstrProfNamesVarName());
  1003. NamesSize = CompressedNameStr.size();
  1004. NamesVar->setSection(
  1005. getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
  1006. // On COFF, it's important to reduce the alignment down to 1 to prevent the
  1007. // linker from inserting padding before the start of the names section or
  1008. // between names entries.
  1009. NamesVar->setAlignment(Align(1));
  1010. // NamesVar is used by runtime but not referenced via relocation by other
  1011. // sections. Conservatively make it linker retained.
  1012. UsedVars.push_back(NamesVar);
  1013. for (auto *NamePtr : ReferencedNames)
  1014. NamePtr->eraseFromParent();
  1015. }
  1016. void InstrProfiling::emitRegistration() {
  1017. if (!needsRuntimeRegistrationOfSectionRange(TT))
  1018. return;
  1019. // Construct the function.
  1020. auto *VoidTy = Type::getVoidTy(M->getContext());
  1021. auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
  1022. auto *Int64Ty = Type::getInt64Ty(M->getContext());
  1023. auto *RegisterFTy = FunctionType::get(VoidTy, false);
  1024. auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
  1025. getInstrProfRegFuncsName(), M);
  1026. RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  1027. if (Options.NoRedZone)
  1028. RegisterF->addFnAttr(Attribute::NoRedZone);
  1029. auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
  1030. auto *RuntimeRegisterF =
  1031. Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
  1032. getInstrProfRegFuncName(), M);
  1033. IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
  1034. for (Value *Data : CompilerUsedVars)
  1035. if (!isa<Function>(Data))
  1036. IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
  1037. for (Value *Data : UsedVars)
  1038. if (Data != NamesVar && !isa<Function>(Data))
  1039. IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
  1040. if (NamesVar) {
  1041. Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
  1042. auto *NamesRegisterTy =
  1043. FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
  1044. auto *NamesRegisterF =
  1045. Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
  1046. getInstrProfNamesRegFuncName(), M);
  1047. IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
  1048. IRB.getInt64(NamesSize)});
  1049. }
  1050. IRB.CreateRetVoid();
  1051. }
  1052. bool InstrProfiling::emitRuntimeHook() {
  1053. // We expect the linker to be invoked with -u<hook_var> flag for Linux
  1054. // in which case there is no need to emit the external variable.
  1055. if (TT.isOSLinux() || TT.isOSAIX())
  1056. return false;
  1057. // If the module's provided its own runtime, we don't need to do anything.
  1058. if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
  1059. return false;
  1060. // Declare an external variable that will pull in the runtime initialization.
  1061. auto *Int32Ty = Type::getInt32Ty(M->getContext());
  1062. auto *Var =
  1063. new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
  1064. nullptr, getInstrProfRuntimeHookVarName());
  1065. Var->setVisibility(GlobalValue::HiddenVisibility);
  1066. if (TT.isOSBinFormatELF() && !TT.isPS()) {
  1067. // Mark the user variable as used so that it isn't stripped out.
  1068. CompilerUsedVars.push_back(Var);
  1069. } else {
  1070. // Make a function that uses it.
  1071. auto *User = Function::Create(FunctionType::get(Int32Ty, false),
  1072. GlobalValue::LinkOnceODRLinkage,
  1073. getInstrProfRuntimeHookVarUseFuncName(), M);
  1074. User->addFnAttr(Attribute::NoInline);
  1075. if (Options.NoRedZone)
  1076. User->addFnAttr(Attribute::NoRedZone);
  1077. User->setVisibility(GlobalValue::HiddenVisibility);
  1078. if (TT.supportsCOMDAT())
  1079. User->setComdat(M->getOrInsertComdat(User->getName()));
  1080. IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
  1081. auto *Load = IRB.CreateLoad(Int32Ty, Var);
  1082. IRB.CreateRet(Load);
  1083. // Mark the function as used so that it isn't stripped out.
  1084. CompilerUsedVars.push_back(User);
  1085. }
  1086. return true;
  1087. }
  1088. void InstrProfiling::emitUses() {
  1089. // The metadata sections are parallel arrays. Optimizers (e.g.
  1090. // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
  1091. // we conservatively retain all unconditionally in the compiler.
  1092. //
  1093. // On ELF and Mach-O, the linker can guarantee the associated sections will be
  1094. // retained or discarded as a unit, so llvm.compiler.used is sufficient.
  1095. // Similarly on COFF, if prof data is not referenced by code we use one comdat
  1096. // and ensure this GC property as well. Otherwise, we have to conservatively
  1097. // make all of the sections retained by the linker.
  1098. if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
  1099. (TT.isOSBinFormatCOFF() && !profDataReferencedByCode(*M)))
  1100. appendToCompilerUsed(*M, CompilerUsedVars);
  1101. else
  1102. appendToUsed(*M, CompilerUsedVars);
  1103. // We do not add proper references from used metadata sections to NamesVar and
  1104. // VNodesVar, so we have to be conservative and place them in llvm.used
  1105. // regardless of the target,
  1106. appendToUsed(*M, UsedVars);
  1107. }
  1108. void InstrProfiling::emitInitialization() {
  1109. // Create ProfileFileName variable. Don't don't this for the
  1110. // context-sensitive instrumentation lowering: This lowering is after
  1111. // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
  1112. // have already create the variable before LTO/ThinLTO linking.
  1113. if (!IsCS)
  1114. createProfileFileNameVar(*M, Options.InstrProfileOutput);
  1115. Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
  1116. if (!RegisterF)
  1117. return;
  1118. // Create the initialization function.
  1119. auto *VoidTy = Type::getVoidTy(M->getContext());
  1120. auto *F = Function::Create(FunctionType::get(VoidTy, false),
  1121. GlobalValue::InternalLinkage,
  1122. getInstrProfInitFuncName(), M);
  1123. F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  1124. F->addFnAttr(Attribute::NoInline);
  1125. if (Options.NoRedZone)
  1126. F->addFnAttr(Attribute::NoRedZone);
  1127. // Add the basic block and the necessary calls.
  1128. IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
  1129. IRB.CreateCall(RegisterF, {});
  1130. IRB.CreateRetVoid();
  1131. appendToGlobalCtors(*M, F, 0);
  1132. }