PGOInstrumentation.cpp 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220
  1. //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements PGO instrumentation using a minimum spanning tree based
  10. // on the following paper:
  11. // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
  12. // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
  13. // Issue 3, pp 313-322
  14. // The idea of the algorithm based on the fact that for each node (except for
  15. // the entry and exit), the sum of incoming edge counts equals the sum of
  16. // outgoing edge counts. The count of edge on spanning tree can be derived from
  17. // those edges not on the spanning tree. Knuth proves this method instruments
  18. // the minimum number of edges.
  19. //
  20. // The minimal spanning tree here is actually a maximum weight tree -- on-tree
  21. // edges have higher frequencies (more likely to execute). The idea is to
  22. // instrument those less frequently executed edges to reduce the runtime
  23. // overhead of instrumented binaries.
  24. //
  25. // This file contains two passes:
  26. // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
  27. // count profile, and generates the instrumentation for indirect call
  28. // profiling.
  29. // (2) Pass PGOInstrumentationUse which reads the edge count profile and
  30. // annotates the branch weights. It also reads the indirect call value
  31. // profiling records and annotate the indirect call instructions.
  32. //
  33. // To get the precise counter information, These two passes need to invoke at
  34. // the same compilation point (so they see the same IR). For pass
  35. // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
  36. // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
  37. // the profile is opened in module level and passed to each PGOUseFunc instance.
  38. // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
  39. // in class FuncPGOInstrumentation.
  40. //
  41. // Class PGOEdge represents a CFG edge and some auxiliary information. Class
  42. // BBInfo contains auxiliary information for each BB. These two classes are used
  43. // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
  44. // class of PGOEdge and BBInfo, respectively. They contains extra data structure
  45. // used in populating profile counters.
  46. // The MST implementation is in Class CFGMST (CFGMST.h).
  47. //
  48. //===----------------------------------------------------------------------===//
  49. #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
  50. #include "CFGMST.h"
  51. #include "ValueProfileCollector.h"
  52. #include "llvm/ADT/APInt.h"
  53. #include "llvm/ADT/ArrayRef.h"
  54. #include "llvm/ADT/MapVector.h"
  55. #include "llvm/ADT/STLExtras.h"
  56. #include "llvm/ADT/SmallVector.h"
  57. #include "llvm/ADT/Statistic.h"
  58. #include "llvm/ADT/StringRef.h"
  59. #include "llvm/ADT/Triple.h"
  60. #include "llvm/ADT/Twine.h"
  61. #include "llvm/ADT/iterator.h"
  62. #include "llvm/ADT/iterator_range.h"
  63. #include "llvm/Analysis/BlockFrequencyInfo.h"
  64. #include "llvm/Analysis/BranchProbabilityInfo.h"
  65. #include "llvm/Analysis/CFG.h"
  66. #include "llvm/Analysis/EHPersonalities.h"
  67. #include "llvm/Analysis/LoopInfo.h"
  68. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  69. #include "llvm/Analysis/ProfileSummaryInfo.h"
  70. #include "llvm/IR/Attributes.h"
  71. #include "llvm/IR/BasicBlock.h"
  72. #include "llvm/IR/CFG.h"
  73. #include "llvm/IR/Comdat.h"
  74. #include "llvm/IR/Constant.h"
  75. #include "llvm/IR/Constants.h"
  76. #include "llvm/IR/DiagnosticInfo.h"
  77. #include "llvm/IR/Dominators.h"
  78. #include "llvm/IR/Function.h"
  79. #include "llvm/IR/GlobalAlias.h"
  80. #include "llvm/IR/GlobalValue.h"
  81. #include "llvm/IR/GlobalVariable.h"
  82. #include "llvm/IR/IRBuilder.h"
  83. #include "llvm/IR/InstVisitor.h"
  84. #include "llvm/IR/InstrTypes.h"
  85. #include "llvm/IR/Instruction.h"
  86. #include "llvm/IR/Instructions.h"
  87. #include "llvm/IR/IntrinsicInst.h"
  88. #include "llvm/IR/Intrinsics.h"
  89. #include "llvm/IR/LLVMContext.h"
  90. #include "llvm/IR/MDBuilder.h"
  91. #include "llvm/IR/Module.h"
  92. #include "llvm/IR/PassManager.h"
  93. #include "llvm/IR/ProfileSummary.h"
  94. #include "llvm/IR/Type.h"
  95. #include "llvm/IR/Value.h"
  96. #include "llvm/InitializePasses.h"
  97. #include "llvm/Pass.h"
  98. #include "llvm/ProfileData/InstrProf.h"
  99. #include "llvm/ProfileData/InstrProfReader.h"
  100. #include "llvm/Support/BranchProbability.h"
  101. #include "llvm/Support/CRC.h"
  102. #include "llvm/Support/Casting.h"
  103. #include "llvm/Support/CommandLine.h"
  104. #include "llvm/Support/DOTGraphTraits.h"
  105. #include "llvm/Support/Debug.h"
  106. #include "llvm/Support/Error.h"
  107. #include "llvm/Support/ErrorHandling.h"
  108. #include "llvm/Support/GraphWriter.h"
  109. #include "llvm/Support/raw_ostream.h"
  110. #include "llvm/Transforms/Instrumentation.h"
  111. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  112. #include "llvm/Transforms/Utils/ModuleUtils.h"
  113. #include <algorithm>
  114. #include <cassert>
  115. #include <cstdint>
  116. #include <memory>
  117. #include <numeric>
  118. #include <string>
  119. #include <unordered_map>
  120. #include <utility>
  121. #include <vector>
  122. using namespace llvm;
  123. using ProfileCount = Function::ProfileCount;
  124. using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
  125. #define DEBUG_TYPE "pgo-instrumentation"
  126. STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
  127. STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
  128. STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
  129. STATISTIC(NumOfPGOEdge, "Number of edges.");
  130. STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
  131. STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
  132. STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
  133. STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
  134. STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
  135. STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
  136. STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
  137. STATISTIC(NumOfCSPGOSelectInsts,
  138. "Number of select instruction instrumented in CSPGO.");
  139. STATISTIC(NumOfCSPGOMemIntrinsics,
  140. "Number of mem intrinsics instrumented in CSPGO.");
  141. STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
  142. STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
  143. STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
  144. STATISTIC(NumOfCSPGOFunc,
  145. "Number of functions having valid profile counts in CSPGO.");
  146. STATISTIC(NumOfCSPGOMismatch,
  147. "Number of functions having mismatch profile in CSPGO.");
  148. STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
  149. // Command line option to specify the file to read profile from. This is
  150. // mainly used for testing.
  151. static cl::opt<std::string>
  152. PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
  153. cl::value_desc("filename"),
  154. cl::desc("Specify the path of profile data file. This is"
  155. "mainly for test purpose."));
  156. static cl::opt<std::string> PGOTestProfileRemappingFile(
  157. "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
  158. cl::value_desc("filename"),
  159. cl::desc("Specify the path of profile remapping file. This is mainly for "
  160. "test purpose."));
  161. // Command line option to disable value profiling. The default is false:
  162. // i.e. value profiling is enabled by default. This is for debug purpose.
  163. static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
  164. cl::Hidden,
  165. cl::desc("Disable Value Profiling"));
  166. // Command line option to set the maximum number of VP annotations to write to
  167. // the metadata for a single indirect call callsite.
  168. static cl::opt<unsigned> MaxNumAnnotations(
  169. "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
  170. cl::desc("Max number of annotations for a single indirect "
  171. "call callsite"));
  172. // Command line option to set the maximum number of value annotations
  173. // to write to the metadata for a single memop intrinsic.
  174. static cl::opt<unsigned> MaxNumMemOPAnnotations(
  175. "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
  176. cl::desc("Max number of preicise value annotations for a single memop"
  177. "intrinsic"));
  178. // Command line option to control appending FunctionHash to the name of a COMDAT
  179. // function. This is to avoid the hash mismatch caused by the preinliner.
  180. static cl::opt<bool> DoComdatRenaming(
  181. "do-comdat-renaming", cl::init(false), cl::Hidden,
  182. cl::desc("Append function hash to the name of COMDAT function to avoid "
  183. "function hash mismatch due to the preinliner"));
  184. // Command line option to enable/disable the warning about missing profile
  185. // information.
  186. static cl::opt<bool>
  187. PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
  188. cl::desc("Use this option to turn on/off "
  189. "warnings about missing profile data for "
  190. "functions."));
  191. namespace llvm {
  192. // Command line option to enable/disable the warning about a hash mismatch in
  193. // the profile data.
  194. cl::opt<bool>
  195. NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
  196. cl::desc("Use this option to turn off/on "
  197. "warnings about profile cfg mismatch."));
  198. } // namespace llvm
  199. // Command line option to enable/disable the warning about a hash mismatch in
  200. // the profile data for Comdat functions, which often turns out to be false
  201. // positive due to the pre-instrumentation inline.
  202. static cl::opt<bool>
  203. NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
  204. cl::Hidden,
  205. cl::desc("The option is used to turn on/off "
  206. "warnings about hash mismatch for comdat "
  207. "functions."));
  208. // Command line option to enable/disable select instruction instrumentation.
  209. static cl::opt<bool>
  210. PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
  211. cl::desc("Use this option to turn on/off SELECT "
  212. "instruction instrumentation. "));
  213. // Command line option to turn on CFG dot or text dump of raw profile counts
  214. static cl::opt<PGOViewCountsType> PGOViewRawCounts(
  215. "pgo-view-raw-counts", cl::Hidden,
  216. cl::desc("A boolean option to show CFG dag or text "
  217. "with raw profile counts from "
  218. "profile data. See also option "
  219. "-pgo-view-counts. To limit graph "
  220. "display to only one function, use "
  221. "filtering option -view-bfi-func-name."),
  222. cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
  223. clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
  224. clEnumValN(PGOVCT_Text, "text", "show in text.")));
  225. // Command line option to enable/disable memop intrinsic call.size profiling.
  226. static cl::opt<bool>
  227. PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
  228. cl::desc("Use this option to turn on/off "
  229. "memory intrinsic size profiling."));
  230. // Emit branch probability as optimization remarks.
  231. static cl::opt<bool>
  232. EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
  233. cl::desc("When this option is on, the annotated "
  234. "branch probability will be emitted as "
  235. "optimization remarks: -{Rpass|"
  236. "pass-remarks}=pgo-instrumentation"));
  237. static cl::opt<bool> PGOInstrumentEntry(
  238. "pgo-instrument-entry", cl::init(false), cl::Hidden,
  239. cl::desc("Force to instrument function entry basicblock."));
  240. static cl::opt<bool> PGOFunctionEntryCoverage(
  241. "pgo-function-entry-coverage", cl::init(false), cl::Hidden, cl::ZeroOrMore,
  242. cl::desc(
  243. "Use this option to enable function entry coverage instrumentation."));
  244. static cl::opt<bool>
  245. PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
  246. cl::desc("Fix function entry count in profile use."));
  247. static cl::opt<bool> PGOVerifyHotBFI(
  248. "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
  249. cl::desc("Print out the non-match BFI count if a hot raw profile count "
  250. "becomes non-hot, or a cold raw profile count becomes hot. "
  251. "The print is enabled under -Rpass-analysis=pgo, or "
  252. "internal option -pass-remakrs-analysis=pgo."));
  253. static cl::opt<bool> PGOVerifyBFI(
  254. "pgo-verify-bfi", cl::init(false), cl::Hidden,
  255. cl::desc("Print out mismatched BFI counts after setting profile metadata "
  256. "The print is enabled under -Rpass-analysis=pgo, or "
  257. "internal option -pass-remakrs-analysis=pgo."));
  258. static cl::opt<unsigned> PGOVerifyBFIRatio(
  259. "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
  260. cl::desc("Set the threshold for pgo-verify-bfi: only print out "
  261. "mismatched BFI if the difference percentage is greater than "
  262. "this value (in percentage)."));
  263. static cl::opt<unsigned> PGOVerifyBFICutoff(
  264. "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
  265. cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
  266. "profile count value is below."));
  267. namespace llvm {
  268. // Command line option to turn on CFG dot dump after profile annotation.
  269. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
  270. extern cl::opt<PGOViewCountsType> PGOViewCounts;
  271. // Command line option to specify the name of the function for CFG dump
  272. // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
  273. extern cl::opt<std::string> ViewBlockFreqFuncName;
  274. extern cl::opt<bool> DebugInfoCorrelate;
  275. } // namespace llvm
  276. static cl::opt<bool>
  277. PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
  278. cl::desc("Use the old CFG function hashing"));
  279. // Return a string describing the branch condition that can be
  280. // used in static branch probability heuristics:
  281. static std::string getBranchCondString(Instruction *TI) {
  282. BranchInst *BI = dyn_cast<BranchInst>(TI);
  283. if (!BI || !BI->isConditional())
  284. return std::string();
  285. Value *Cond = BI->getCondition();
  286. ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
  287. if (!CI)
  288. return std::string();
  289. std::string result;
  290. raw_string_ostream OS(result);
  291. OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
  292. CI->getOperand(0)->getType()->print(OS, true);
  293. Value *RHS = CI->getOperand(1);
  294. ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
  295. if (CV) {
  296. if (CV->isZero())
  297. OS << "_Zero";
  298. else if (CV->isOne())
  299. OS << "_One";
  300. else if (CV->isMinusOne())
  301. OS << "_MinusOne";
  302. else
  303. OS << "_Const";
  304. }
  305. OS.flush();
  306. return result;
  307. }
  308. static const char *ValueProfKindDescr[] = {
  309. #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
  310. #include "llvm/ProfileData/InstrProfData.inc"
  311. };
  312. // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
  313. // aware this is an ir_level profile so it can set the version flag.
  314. static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
  315. const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
  316. Type *IntTy64 = Type::getInt64Ty(M.getContext());
  317. uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
  318. if (IsCS)
  319. ProfileVersion |= VARIANT_MASK_CSIR_PROF;
  320. if (PGOInstrumentEntry)
  321. ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
  322. if (DebugInfoCorrelate)
  323. ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
  324. if (PGOFunctionEntryCoverage)
  325. ProfileVersion |=
  326. VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
  327. auto IRLevelVersionVariable = new GlobalVariable(
  328. M, IntTy64, true, GlobalValue::WeakAnyLinkage,
  329. Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
  330. IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
  331. Triple TT(M.getTargetTriple());
  332. if (TT.supportsCOMDAT()) {
  333. IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
  334. IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
  335. }
  336. return IRLevelVersionVariable;
  337. }
  338. namespace {
  339. /// The select instruction visitor plays three roles specified
  340. /// by the mode. In \c VM_counting mode, it simply counts the number of
  341. /// select instructions. In \c VM_instrument mode, it inserts code to count
  342. /// the number times TrueValue of select is taken. In \c VM_annotate mode,
  343. /// it reads the profile data and annotate the select instruction with metadata.
  344. enum VisitMode { VM_counting, VM_instrument, VM_annotate };
  345. class PGOUseFunc;
  346. /// Instruction Visitor class to visit select instructions.
  347. struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
  348. Function &F;
  349. unsigned NSIs = 0; // Number of select instructions instrumented.
  350. VisitMode Mode = VM_counting; // Visiting mode.
  351. unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
  352. unsigned TotalNumCtrs = 0; // Total number of counters
  353. GlobalVariable *FuncNameVar = nullptr;
  354. uint64_t FuncHash = 0;
  355. PGOUseFunc *UseFunc = nullptr;
  356. SelectInstVisitor(Function &Func) : F(Func) {}
  357. void countSelects(Function &Func) {
  358. NSIs = 0;
  359. Mode = VM_counting;
  360. visit(Func);
  361. }
  362. // Visit the IR stream and instrument all select instructions. \p
  363. // Ind is a pointer to the counter index variable; \p TotalNC
  364. // is the total number of counters; \p FNV is the pointer to the
  365. // PGO function name var; \p FHash is the function hash.
  366. void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
  367. GlobalVariable *FNV, uint64_t FHash) {
  368. Mode = VM_instrument;
  369. CurCtrIdx = Ind;
  370. TotalNumCtrs = TotalNC;
  371. FuncHash = FHash;
  372. FuncNameVar = FNV;
  373. visit(Func);
  374. }
  375. // Visit the IR stream and annotate all select instructions.
  376. void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
  377. Mode = VM_annotate;
  378. UseFunc = UF;
  379. CurCtrIdx = Ind;
  380. visit(Func);
  381. }
  382. void instrumentOneSelectInst(SelectInst &SI);
  383. void annotateOneSelectInst(SelectInst &SI);
  384. // Visit \p SI instruction and perform tasks according to visit mode.
  385. void visitSelectInst(SelectInst &SI);
  386. // Return the number of select instructions. This needs be called after
  387. // countSelects().
  388. unsigned getNumOfSelectInsts() const { return NSIs; }
  389. };
  390. class PGOInstrumentationGenLegacyPass : public ModulePass {
  391. public:
  392. static char ID;
  393. PGOInstrumentationGenLegacyPass(bool IsCS = false)
  394. : ModulePass(ID), IsCS(IsCS) {
  395. initializePGOInstrumentationGenLegacyPassPass(
  396. *PassRegistry::getPassRegistry());
  397. }
  398. StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
  399. private:
  400. // Is this is context-sensitive instrumentation.
  401. bool IsCS;
  402. bool runOnModule(Module &M) override;
  403. void getAnalysisUsage(AnalysisUsage &AU) const override {
  404. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  405. AU.addRequired<TargetLibraryInfoWrapperPass>();
  406. }
  407. };
  408. class PGOInstrumentationUseLegacyPass : public ModulePass {
  409. public:
  410. static char ID;
  411. // Provide the profile filename as the parameter.
  412. PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
  413. : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
  414. if (!PGOTestProfileFile.empty())
  415. ProfileFileName = PGOTestProfileFile;
  416. initializePGOInstrumentationUseLegacyPassPass(
  417. *PassRegistry::getPassRegistry());
  418. }
  419. StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
  420. private:
  421. std::string ProfileFileName;
  422. // Is this is context-sensitive instrumentation use.
  423. bool IsCS;
  424. bool runOnModule(Module &M) override;
  425. void getAnalysisUsage(AnalysisUsage &AU) const override {
  426. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  427. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  428. AU.addRequired<TargetLibraryInfoWrapperPass>();
  429. }
  430. };
  431. class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
  432. public:
  433. static char ID;
  434. StringRef getPassName() const override {
  435. return "PGOInstrumentationGenCreateVarPass";
  436. }
  437. PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
  438. : ModulePass(ID), InstrProfileOutput(CSInstrName) {
  439. initializePGOInstrumentationGenCreateVarLegacyPassPass(
  440. *PassRegistry::getPassRegistry());
  441. }
  442. private:
  443. bool runOnModule(Module &M) override {
  444. createProfileFileNameVar(M, InstrProfileOutput);
  445. // The variable in a comdat may be discarded by LTO. Ensure the
  446. // declaration will be retained.
  447. appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
  448. return false;
  449. }
  450. std::string InstrProfileOutput;
  451. };
  452. } // end anonymous namespace
  453. char PGOInstrumentationGenLegacyPass::ID = 0;
  454. INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
  455. "PGO instrumentation.", false, false)
  456. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  457. INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
  458. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  459. INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
  460. "PGO instrumentation.", false, false)
  461. ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
  462. return new PGOInstrumentationGenLegacyPass(IsCS);
  463. }
  464. char PGOInstrumentationUseLegacyPass::ID = 0;
  465. INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
  466. "Read PGO instrumentation profile.", false, false)
  467. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  468. INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
  469. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  470. INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
  471. "Read PGO instrumentation profile.", false, false)
  472. ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
  473. bool IsCS) {
  474. return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
  475. }
  476. char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
  477. INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
  478. "pgo-instr-gen-create-var",
  479. "Create PGO instrumentation version variable for CSPGO.", false,
  480. false)
  481. ModulePass *
  482. llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
  483. return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName));
  484. }
  485. namespace {
  486. /// An MST based instrumentation for PGO
  487. ///
  488. /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
  489. /// in the function level.
  490. struct PGOEdge {
  491. // This class implements the CFG edges. Note the CFG can be a multi-graph.
  492. // So there might be multiple edges with same SrcBB and DestBB.
  493. const BasicBlock *SrcBB;
  494. const BasicBlock *DestBB;
  495. uint64_t Weight;
  496. bool InMST = false;
  497. bool Removed = false;
  498. bool IsCritical = false;
  499. PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
  500. : SrcBB(Src), DestBB(Dest), Weight(W) {}
  501. // Return the information string of an edge.
  502. std::string infoString() const {
  503. return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
  504. (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
  505. }
  506. };
  507. // This class stores the auxiliary information for each BB.
  508. struct BBInfo {
  509. BBInfo *Group;
  510. uint32_t Index;
  511. uint32_t Rank = 0;
  512. BBInfo(unsigned IX) : Group(this), Index(IX) {}
  513. // Return the information string of this object.
  514. std::string infoString() const {
  515. return (Twine("Index=") + Twine(Index)).str();
  516. }
  517. // Empty function -- only applicable to UseBBInfo.
  518. void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
  519. // Empty function -- only applicable to UseBBInfo.
  520. void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
  521. };
  522. // This class implements the CFG edges. Note the CFG can be a multi-graph.
  523. template <class Edge, class BBInfo> class FuncPGOInstrumentation {
  524. private:
  525. Function &F;
  526. // Is this is context-sensitive instrumentation.
  527. bool IsCS;
  528. // A map that stores the Comdat group in function F.
  529. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
  530. ValueProfileCollector VPC;
  531. void computeCFGHash();
  532. void renameComdatFunction();
  533. public:
  534. std::vector<std::vector<VPCandidateInfo>> ValueSites;
  535. SelectInstVisitor SIVisitor;
  536. std::string FuncName;
  537. GlobalVariable *FuncNameVar;
  538. // CFG hash value for this function.
  539. uint64_t FunctionHash = 0;
  540. // The Minimum Spanning Tree of function CFG.
  541. CFGMST<Edge, BBInfo> MST;
  542. // Collect all the BBs that will be instrumented, and store them in
  543. // InstrumentBBs.
  544. void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
  545. // Give an edge, find the BB that will be instrumented.
  546. // Return nullptr if there is no BB to be instrumented.
  547. BasicBlock *getInstrBB(Edge *E);
  548. // Return the auxiliary BB information.
  549. BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
  550. // Return the auxiliary BB information if available.
  551. BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
  552. // Dump edges and BB information.
  553. void dumpInfo(std::string Str = "") const {
  554. MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
  555. Twine(FunctionHash) + "\t" + Str);
  556. }
  557. FuncPGOInstrumentation(
  558. Function &Func, TargetLibraryInfo &TLI,
  559. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
  560. bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
  561. BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
  562. bool InstrumentFuncEntry = true)
  563. : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
  564. ValueSites(IPVK_Last + 1), SIVisitor(Func),
  565. MST(F, InstrumentFuncEntry, BPI, BFI) {
  566. // This should be done before CFG hash computation.
  567. SIVisitor.countSelects(Func);
  568. ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
  569. if (!IsCS) {
  570. NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
  571. NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
  572. NumOfPGOBB += MST.BBInfos.size();
  573. ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
  574. } else {
  575. NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
  576. NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
  577. NumOfCSPGOBB += MST.BBInfos.size();
  578. }
  579. FuncName = getPGOFuncName(F);
  580. computeCFGHash();
  581. if (!ComdatMembers.empty())
  582. renameComdatFunction();
  583. LLVM_DEBUG(dumpInfo("after CFGMST"));
  584. for (auto &E : MST.AllEdges) {
  585. if (E->Removed)
  586. continue;
  587. IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
  588. if (!E->InMST)
  589. IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
  590. }
  591. if (CreateGlobalVar)
  592. FuncNameVar = createPGOFuncNameVar(F, FuncName);
  593. }
  594. };
  595. } // end anonymous namespace
  596. // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
  597. // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
  598. // of selects, indirect calls, mem ops and edges.
  599. template <class Edge, class BBInfo>
  600. void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
  601. std::vector<uint8_t> Indexes;
  602. JamCRC JC;
  603. for (auto &BB : F) {
  604. const Instruction *TI = BB.getTerminator();
  605. for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
  606. BasicBlock *Succ = TI->getSuccessor(I);
  607. auto BI = findBBInfo(Succ);
  608. if (BI == nullptr)
  609. continue;
  610. uint32_t Index = BI->Index;
  611. for (int J = 0; J < 4; J++)
  612. Indexes.push_back((uint8_t)(Index >> (J * 8)));
  613. }
  614. }
  615. JC.update(Indexes);
  616. JamCRC JCH;
  617. if (PGOOldCFGHashing) {
  618. // Hash format for context sensitive profile. Reserve 4 bits for other
  619. // information.
  620. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
  621. (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
  622. //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
  623. (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
  624. } else {
  625. // The higher 32 bits.
  626. auto updateJCH = [&JCH](uint64_t Num) {
  627. uint8_t Data[8];
  628. support::endian::write64le(Data, Num);
  629. JCH.update(Data);
  630. };
  631. updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
  632. updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
  633. updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
  634. updateJCH((uint64_t)MST.AllEdges.size());
  635. // Hash format for context sensitive profile. Reserve 4 bits for other
  636. // information.
  637. FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
  638. }
  639. // Reserve bit 60-63 for other information purpose.
  640. FunctionHash &= 0x0FFFFFFFFFFFFFFF;
  641. if (IsCS)
  642. NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
  643. LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
  644. << " CRC = " << JC.getCRC()
  645. << ", Selects = " << SIVisitor.getNumOfSelectInsts()
  646. << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
  647. << ValueSites[IPVK_IndirectCallTarget].size());
  648. if (!PGOOldCFGHashing) {
  649. LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
  650. << ", High32 CRC = " << JCH.getCRC());
  651. }
  652. LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
  653. }
  654. // Check if we can safely rename this Comdat function.
  655. static bool canRenameComdat(
  656. Function &F,
  657. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
  658. if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
  659. return false;
  660. // FIXME: Current only handle those Comdat groups that only containing one
  661. // function.
  662. // (1) For a Comdat group containing multiple functions, we need to have a
  663. // unique postfix based on the hashes for each function. There is a
  664. // non-trivial code refactoring to do this efficiently.
  665. // (2) Variables can not be renamed, so we can not rename Comdat function in a
  666. // group including global vars.
  667. Comdat *C = F.getComdat();
  668. for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
  669. assert(!isa<GlobalAlias>(CM.second));
  670. Function *FM = dyn_cast<Function>(CM.second);
  671. if (FM != &F)
  672. return false;
  673. }
  674. return true;
  675. }
  676. // Append the CFGHash to the Comdat function name.
  677. template <class Edge, class BBInfo>
  678. void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
  679. if (!canRenameComdat(F, ComdatMembers))
  680. return;
  681. std::string OrigName = F.getName().str();
  682. std::string NewFuncName =
  683. Twine(F.getName() + "." + Twine(FunctionHash)).str();
  684. F.setName(Twine(NewFuncName));
  685. GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
  686. FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
  687. Comdat *NewComdat;
  688. Module *M = F.getParent();
  689. // For AvailableExternallyLinkage functions, change the linkage to
  690. // LinkOnceODR and put them into comdat. This is because after renaming, there
  691. // is no backup external copy available for the function.
  692. if (!F.hasComdat()) {
  693. assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
  694. NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
  695. F.setLinkage(GlobalValue::LinkOnceODRLinkage);
  696. F.setComdat(NewComdat);
  697. return;
  698. }
  699. // This function belongs to a single function Comdat group.
  700. Comdat *OrigComdat = F.getComdat();
  701. std::string NewComdatName =
  702. Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
  703. NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
  704. NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
  705. for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
  706. // Must be a function.
  707. cast<Function>(CM.second)->setComdat(NewComdat);
  708. }
  709. }
  710. // Collect all the BBs that will be instruments and return them in
  711. // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
  712. template <class Edge, class BBInfo>
  713. void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
  714. std::vector<BasicBlock *> &InstrumentBBs) {
  715. // Use a worklist as we will update the vector during the iteration.
  716. std::vector<Edge *> EdgeList;
  717. EdgeList.reserve(MST.AllEdges.size());
  718. for (auto &E : MST.AllEdges)
  719. EdgeList.push_back(E.get());
  720. for (auto &E : EdgeList) {
  721. BasicBlock *InstrBB = getInstrBB(E);
  722. if (InstrBB)
  723. InstrumentBBs.push_back(InstrBB);
  724. }
  725. // Set up InEdges/OutEdges for all BBs.
  726. for (auto &E : MST.AllEdges) {
  727. if (E->Removed)
  728. continue;
  729. const BasicBlock *SrcBB = E->SrcBB;
  730. const BasicBlock *DestBB = E->DestBB;
  731. BBInfo &SrcInfo = getBBInfo(SrcBB);
  732. BBInfo &DestInfo = getBBInfo(DestBB);
  733. SrcInfo.addOutEdge(E.get());
  734. DestInfo.addInEdge(E.get());
  735. }
  736. }
  737. // Given a CFG E to be instrumented, find which BB to place the instrumented
  738. // code. The function will split the critical edge if necessary.
  739. template <class Edge, class BBInfo>
  740. BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
  741. if (E->InMST || E->Removed)
  742. return nullptr;
  743. BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
  744. BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
  745. // For a fake edge, instrument the real BB.
  746. if (SrcBB == nullptr)
  747. return DestBB;
  748. if (DestBB == nullptr)
  749. return SrcBB;
  750. auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
  751. // There are basic blocks (such as catchswitch) cannot be instrumented.
  752. // If the returned first insertion point is the end of BB, skip this BB.
  753. if (BB->getFirstInsertionPt() == BB->end())
  754. return nullptr;
  755. return BB;
  756. };
  757. // Instrument the SrcBB if it has a single successor,
  758. // otherwise, the DestBB if this is not a critical edge.
  759. Instruction *TI = SrcBB->getTerminator();
  760. if (TI->getNumSuccessors() <= 1)
  761. return canInstrument(SrcBB);
  762. if (!E->IsCritical)
  763. return canInstrument(DestBB);
  764. // Some IndirectBr critical edges cannot be split by the previous
  765. // SplitIndirectBrCriticalEdges call. Bail out.
  766. unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
  767. BasicBlock *InstrBB =
  768. isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
  769. if (!InstrBB) {
  770. LLVM_DEBUG(
  771. dbgs() << "Fail to split critical edge: not instrument this edge.\n");
  772. return nullptr;
  773. }
  774. // For a critical edge, we have to split. Instrument the newly
  775. // created BB.
  776. IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
  777. LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
  778. << " --> " << getBBInfo(DestBB).Index << "\n");
  779. // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
  780. MST.addEdge(SrcBB, InstrBB, 0);
  781. // Second one: Add new edge of InstrBB->DestBB.
  782. Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
  783. NewEdge1.InMST = true;
  784. E->Removed = true;
  785. return canInstrument(InstrBB);
  786. }
  787. // When generating value profiling calls on Windows routines that make use of
  788. // handler funclets for exception processing an operand bundle needs to attached
  789. // to the called function. This routine will set \p OpBundles to contain the
  790. // funclet information, if any is needed, that should be placed on the generated
  791. // value profiling call for the value profile candidate call.
  792. static void
  793. populateEHOperandBundle(VPCandidateInfo &Cand,
  794. DenseMap<BasicBlock *, ColorVector> &BlockColors,
  795. SmallVectorImpl<OperandBundleDef> &OpBundles) {
  796. auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
  797. if (!OrigCall)
  798. return;
  799. if (!isa<IntrinsicInst>(OrigCall)) {
  800. // The instrumentation call should belong to the same funclet as a
  801. // non-intrinsic call, so just copy the operand bundle, if any exists.
  802. Optional<OperandBundleUse> ParentFunclet =
  803. OrigCall->getOperandBundle(LLVMContext::OB_funclet);
  804. if (ParentFunclet)
  805. OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
  806. } else {
  807. // Intrinsics or other instructions do not get funclet information from the
  808. // front-end. Need to use the BlockColors that was computed by the routine
  809. // colorEHFunclets to determine whether a funclet is needed.
  810. if (!BlockColors.empty()) {
  811. const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
  812. assert(CV.size() == 1 && "non-unique color for block!");
  813. Instruction *EHPad = CV.front()->getFirstNonPHI();
  814. if (EHPad->isEHPad())
  815. OpBundles.emplace_back("funclet", EHPad);
  816. }
  817. }
  818. }
  819. // Visit all edge and instrument the edges not in MST, and do value profiling.
  820. // Critical edges will be split.
  821. static void instrumentOneFunc(
  822. Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
  823. BlockFrequencyInfo *BFI,
  824. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
  825. bool IsCS) {
  826. // Split indirectbr critical edges here before computing the MST rather than
  827. // later in getInstrBB() to avoid invalidating it.
  828. SplitIndirectBrCriticalEdges(F, BPI, BFI);
  829. FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
  830. F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
  831. Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
  832. auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
  833. auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
  834. FuncInfo.FunctionHash);
  835. if (PGOFunctionEntryCoverage) {
  836. assert(!IsCS &&
  837. "entry coverge does not support context-sensitive instrumentation");
  838. auto &EntryBB = F.getEntryBlock();
  839. IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
  840. // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
  841. // i32 <index>)
  842. Builder.CreateCall(
  843. Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
  844. {Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
  845. return;
  846. }
  847. std::vector<BasicBlock *> InstrumentBBs;
  848. FuncInfo.getInstrumentBBs(InstrumentBBs);
  849. unsigned NumCounters =
  850. InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
  851. uint32_t I = 0;
  852. for (auto *InstrBB : InstrumentBBs) {
  853. IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
  854. assert(Builder.GetInsertPoint() != InstrBB->end() &&
  855. "Cannot get the Instrumentation point");
  856. // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
  857. // i32 <index>)
  858. Builder.CreateCall(
  859. Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
  860. {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
  861. }
  862. // Now instrument select instructions:
  863. FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
  864. FuncInfo.FunctionHash);
  865. assert(I == NumCounters);
  866. if (DisableValueProfiling)
  867. return;
  868. NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
  869. // Intrinsic function calls do not have funclet operand bundles needed for
  870. // Windows exception handling attached to them. However, if value profiling is
  871. // inserted for one of these calls, then a funclet value will need to be set
  872. // on the instrumentation call based on the funclet coloring.
  873. DenseMap<BasicBlock *, ColorVector> BlockColors;
  874. if (F.hasPersonalityFn() &&
  875. isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
  876. BlockColors = colorEHFunclets(F);
  877. // For each VP Kind, walk the VP candidates and instrument each one.
  878. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
  879. unsigned SiteIndex = 0;
  880. if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
  881. continue;
  882. for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
  883. LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
  884. << " site: CallSite Index = " << SiteIndex << "\n");
  885. IRBuilder<> Builder(Cand.InsertPt);
  886. assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
  887. "Cannot get the Instrumentation point");
  888. Value *ToProfile = nullptr;
  889. if (Cand.V->getType()->isIntegerTy())
  890. ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
  891. else if (Cand.V->getType()->isPointerTy())
  892. ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
  893. assert(ToProfile && "value profiling Value is of unexpected type");
  894. SmallVector<OperandBundleDef, 1> OpBundles;
  895. populateEHOperandBundle(Cand, BlockColors, OpBundles);
  896. Builder.CreateCall(
  897. Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
  898. {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
  899. Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
  900. Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
  901. OpBundles);
  902. }
  903. } // IPVK_First <= Kind <= IPVK_Last
  904. }
  905. namespace {
  906. // This class represents a CFG edge in profile use compilation.
  907. struct PGOUseEdge : public PGOEdge {
  908. bool CountValid = false;
  909. uint64_t CountValue = 0;
  910. PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
  911. : PGOEdge(Src, Dest, W) {}
  912. // Set edge count value
  913. void setEdgeCount(uint64_t Value) {
  914. CountValue = Value;
  915. CountValid = true;
  916. }
  917. // Return the information string for this object.
  918. std::string infoString() const {
  919. if (!CountValid)
  920. return PGOEdge::infoString();
  921. return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
  922. .str();
  923. }
  924. };
  925. using DirectEdges = SmallVector<PGOUseEdge *, 2>;
  926. // This class stores the auxiliary information for each BB.
  927. struct UseBBInfo : public BBInfo {
  928. uint64_t CountValue = 0;
  929. bool CountValid;
  930. int32_t UnknownCountInEdge = 0;
  931. int32_t UnknownCountOutEdge = 0;
  932. DirectEdges InEdges;
  933. DirectEdges OutEdges;
  934. UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
  935. UseBBInfo(unsigned IX, uint64_t C)
  936. : BBInfo(IX), CountValue(C), CountValid(true) {}
  937. // Set the profile count value for this BB.
  938. void setBBInfoCount(uint64_t Value) {
  939. CountValue = Value;
  940. CountValid = true;
  941. }
  942. // Return the information string of this object.
  943. std::string infoString() const {
  944. if (!CountValid)
  945. return BBInfo::infoString();
  946. return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
  947. }
  948. // Add an OutEdge and update the edge count.
  949. void addOutEdge(PGOUseEdge *E) {
  950. OutEdges.push_back(E);
  951. UnknownCountOutEdge++;
  952. }
  953. // Add an InEdge and update the edge count.
  954. void addInEdge(PGOUseEdge *E) {
  955. InEdges.push_back(E);
  956. UnknownCountInEdge++;
  957. }
  958. };
  959. } // end anonymous namespace
  960. // Sum up the count values for all the edges.
  961. static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
  962. uint64_t Total = 0;
  963. for (auto &E : Edges) {
  964. if (E->Removed)
  965. continue;
  966. Total += E->CountValue;
  967. }
  968. return Total;
  969. }
  970. namespace {
  971. class PGOUseFunc {
  972. public:
  973. PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
  974. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
  975. BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
  976. ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
  977. : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
  978. FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
  979. InstrumentFuncEntry),
  980. FreqAttr(FFA_Normal), IsCS(IsCS) {}
  981. // Read counts for the instrumented BB from profile.
  982. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
  983. bool &AllMinusOnes);
  984. // Populate the counts for all BBs.
  985. void populateCounters();
  986. // Set the branch weights based on the count values.
  987. void setBranchWeights();
  988. // Annotate the value profile call sites for all value kind.
  989. void annotateValueSites();
  990. // Annotate the value profile call sites for one value kind.
  991. void annotateValueSites(uint32_t Kind);
  992. // Annotate the irreducible loop header weights.
  993. void annotateIrrLoopHeaderWeights();
  994. // The hotness of the function from the profile count.
  995. enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
  996. // Return the function hotness from the profile.
  997. FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
  998. // Return the function hash.
  999. uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
  1000. // Return the profile record for this function;
  1001. InstrProfRecord &getProfileRecord() { return ProfileRecord; }
  1002. // Return the auxiliary BB information.
  1003. UseBBInfo &getBBInfo(const BasicBlock *BB) const {
  1004. return FuncInfo.getBBInfo(BB);
  1005. }
  1006. // Return the auxiliary BB information if available.
  1007. UseBBInfo *findBBInfo(const BasicBlock *BB) const {
  1008. return FuncInfo.findBBInfo(BB);
  1009. }
  1010. Function &getFunc() const { return F; }
  1011. void dumpInfo(std::string Str = "") const {
  1012. FuncInfo.dumpInfo(Str);
  1013. }
  1014. uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
  1015. private:
  1016. Function &F;
  1017. Module *M;
  1018. BlockFrequencyInfo *BFI;
  1019. ProfileSummaryInfo *PSI;
  1020. // This member stores the shared information with class PGOGenFunc.
  1021. FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
  1022. // The maximum count value in the profile. This is only used in PGO use
  1023. // compilation.
  1024. uint64_t ProgramMaxCount;
  1025. // Position of counter that remains to be read.
  1026. uint32_t CountPosition = 0;
  1027. // Total size of the profile count for this function.
  1028. uint32_t ProfileCountSize = 0;
  1029. // ProfileRecord for this function.
  1030. InstrProfRecord ProfileRecord;
  1031. // Function hotness info derived from profile.
  1032. FuncFreqAttr FreqAttr;
  1033. // Is to use the context sensitive profile.
  1034. bool IsCS;
  1035. // Find the Instrumented BB and set the value. Return false on error.
  1036. bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
  1037. // Set the edge counter value for the unknown edge -- there should be only
  1038. // one unknown edge.
  1039. void setEdgeCount(DirectEdges &Edges, uint64_t Value);
  1040. // Return FuncName string;
  1041. std::string getFuncName() const { return FuncInfo.FuncName; }
  1042. // Set the hot/cold inline hints based on the count values.
  1043. // FIXME: This function should be removed once the functionality in
  1044. // the inliner is implemented.
  1045. void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
  1046. if (PSI->isHotCount(EntryCount))
  1047. FreqAttr = FFA_Hot;
  1048. else if (PSI->isColdCount(MaxCount))
  1049. FreqAttr = FFA_Cold;
  1050. }
  1051. };
  1052. } // end anonymous namespace
  1053. // Visit all the edges and assign the count value for the instrumented
  1054. // edges and the BB. Return false on error.
  1055. bool PGOUseFunc::setInstrumentedCounts(
  1056. const std::vector<uint64_t> &CountFromProfile) {
  1057. std::vector<BasicBlock *> InstrumentBBs;
  1058. FuncInfo.getInstrumentBBs(InstrumentBBs);
  1059. unsigned NumCounters =
  1060. InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
  1061. // The number of counters here should match the number of counters
  1062. // in profile. Return if they mismatch.
  1063. if (NumCounters != CountFromProfile.size()) {
  1064. return false;
  1065. }
  1066. auto *FuncEntry = &*F.begin();
  1067. // Set the profile count to the Instrumented BBs.
  1068. uint32_t I = 0;
  1069. for (BasicBlock *InstrBB : InstrumentBBs) {
  1070. uint64_t CountValue = CountFromProfile[I++];
  1071. UseBBInfo &Info = getBBInfo(InstrBB);
  1072. // If we reach here, we know that we have some nonzero count
  1073. // values in this function. The entry count should not be 0.
  1074. // Fix it if necessary.
  1075. if (InstrBB == FuncEntry && CountValue == 0)
  1076. CountValue = 1;
  1077. Info.setBBInfoCount(CountValue);
  1078. }
  1079. ProfileCountSize = CountFromProfile.size();
  1080. CountPosition = I;
  1081. // Set the edge count and update the count of unknown edges for BBs.
  1082. auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
  1083. E->setEdgeCount(Value);
  1084. this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
  1085. this->getBBInfo(E->DestBB).UnknownCountInEdge--;
  1086. };
  1087. // Set the profile count the Instrumented edges. There are BBs that not in
  1088. // MST but not instrumented. Need to set the edge count value so that we can
  1089. // populate the profile counts later.
  1090. for (auto &E : FuncInfo.MST.AllEdges) {
  1091. if (E->Removed || E->InMST)
  1092. continue;
  1093. const BasicBlock *SrcBB = E->SrcBB;
  1094. UseBBInfo &SrcInfo = getBBInfo(SrcBB);
  1095. // If only one out-edge, the edge profile count should be the same as BB
  1096. // profile count.
  1097. if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
  1098. setEdgeCount(E.get(), SrcInfo.CountValue);
  1099. else {
  1100. const BasicBlock *DestBB = E->DestBB;
  1101. UseBBInfo &DestInfo = getBBInfo(DestBB);
  1102. // If only one in-edge, the edge profile count should be the same as BB
  1103. // profile count.
  1104. if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
  1105. setEdgeCount(E.get(), DestInfo.CountValue);
  1106. }
  1107. if (E->CountValid)
  1108. continue;
  1109. // E's count should have been set from profile. If not, this meenas E skips
  1110. // the instrumentation. We set the count to 0.
  1111. setEdgeCount(E.get(), 0);
  1112. }
  1113. return true;
  1114. }
  1115. // Set the count value for the unknown edge. There should be one and only one
  1116. // unknown edge in Edges vector.
  1117. void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
  1118. for (auto &E : Edges) {
  1119. if (E->CountValid)
  1120. continue;
  1121. E->setEdgeCount(Value);
  1122. getBBInfo(E->SrcBB).UnknownCountOutEdge--;
  1123. getBBInfo(E->DestBB).UnknownCountInEdge--;
  1124. return;
  1125. }
  1126. llvm_unreachable("Cannot find the unknown count edge");
  1127. }
  1128. // Emit function metadata indicating PGO profile mismatch.
  1129. static void annotateFunctionWithHashMismatch(Function &F,
  1130. LLVMContext &ctx) {
  1131. const char MetadataName[] = "instr_prof_hash_mismatch";
  1132. SmallVector<Metadata *, 2> Names;
  1133. // If this metadata already exists, ignore.
  1134. auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
  1135. if (Existing) {
  1136. MDTuple *Tuple = cast<MDTuple>(Existing);
  1137. for (auto &N : Tuple->operands()) {
  1138. if (cast<MDString>(N.get())->getString() == MetadataName)
  1139. return;
  1140. Names.push_back(N.get());
  1141. }
  1142. }
  1143. MDBuilder MDB(ctx);
  1144. Names.push_back(MDB.createString(MetadataName));
  1145. MDNode *MD = MDTuple::get(ctx, Names);
  1146. F.setMetadata(LLVMContext::MD_annotation, MD);
  1147. }
  1148. // Read the profile from ProfileFileName and assign the value to the
  1149. // instrumented BB and the edges. This function also updates ProgramMaxCount.
  1150. // Return true if the profile are successfully read, and false on errors.
  1151. bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
  1152. bool &AllMinusOnes) {
  1153. auto &Ctx = M->getContext();
  1154. Expected<InstrProfRecord> Result =
  1155. PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
  1156. if (Error E = Result.takeError()) {
  1157. handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
  1158. auto Err = IPE.get();
  1159. bool SkipWarning = false;
  1160. LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
  1161. << FuncInfo.FuncName << ": ");
  1162. if (Err == instrprof_error::unknown_function) {
  1163. IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
  1164. SkipWarning = !PGOWarnMissing;
  1165. LLVM_DEBUG(dbgs() << "unknown function");
  1166. } else if (Err == instrprof_error::hash_mismatch ||
  1167. Err == instrprof_error::malformed) {
  1168. IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
  1169. SkipWarning =
  1170. NoPGOWarnMismatch ||
  1171. (NoPGOWarnMismatchComdat &&
  1172. (F.hasComdat() ||
  1173. F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
  1174. LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
  1175. // Emit function metadata indicating PGO profile mismatch.
  1176. annotateFunctionWithHashMismatch(F, M->getContext());
  1177. }
  1178. LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
  1179. if (SkipWarning)
  1180. return;
  1181. std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
  1182. std::string(" Hash = ") +
  1183. std::to_string(FuncInfo.FunctionHash);
  1184. Ctx.diagnose(
  1185. DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
  1186. });
  1187. return false;
  1188. }
  1189. ProfileRecord = std::move(Result.get());
  1190. std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
  1191. IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
  1192. LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
  1193. AllMinusOnes = (CountFromProfile.size() > 0);
  1194. uint64_t ValueSum = 0;
  1195. for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
  1196. LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
  1197. ValueSum += CountFromProfile[I];
  1198. if (CountFromProfile[I] != (uint64_t)-1)
  1199. AllMinusOnes = false;
  1200. }
  1201. AllZeros = (ValueSum == 0);
  1202. LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
  1203. getBBInfo(nullptr).UnknownCountOutEdge = 2;
  1204. getBBInfo(nullptr).UnknownCountInEdge = 2;
  1205. if (!setInstrumentedCounts(CountFromProfile)) {
  1206. LLVM_DEBUG(
  1207. dbgs() << "Inconsistent number of counts, skipping this function");
  1208. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1209. M->getName().data(),
  1210. Twine("Inconsistent number of counts in ") + F.getName().str()
  1211. + Twine(": the profile may be stale or there is a function name collision."),
  1212. DS_Warning));
  1213. return false;
  1214. }
  1215. ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
  1216. return true;
  1217. }
  1218. // Populate the counters from instrumented BBs to all BBs.
  1219. // In the end of this operation, all BBs should have a valid count value.
  1220. void PGOUseFunc::populateCounters() {
  1221. bool Changes = true;
  1222. unsigned NumPasses = 0;
  1223. while (Changes) {
  1224. NumPasses++;
  1225. Changes = false;
  1226. // For efficient traversal, it's better to start from the end as most
  1227. // of the instrumented edges are at the end.
  1228. for (auto &BB : reverse(F)) {
  1229. UseBBInfo *Count = findBBInfo(&BB);
  1230. if (Count == nullptr)
  1231. continue;
  1232. if (!Count->CountValid) {
  1233. if (Count->UnknownCountOutEdge == 0) {
  1234. Count->CountValue = sumEdgeCount(Count->OutEdges);
  1235. Count->CountValid = true;
  1236. Changes = true;
  1237. } else if (Count->UnknownCountInEdge == 0) {
  1238. Count->CountValue = sumEdgeCount(Count->InEdges);
  1239. Count->CountValid = true;
  1240. Changes = true;
  1241. }
  1242. }
  1243. if (Count->CountValid) {
  1244. if (Count->UnknownCountOutEdge == 1) {
  1245. uint64_t Total = 0;
  1246. uint64_t OutSum = sumEdgeCount(Count->OutEdges);
  1247. // If the one of the successor block can early terminate (no-return),
  1248. // we can end up with situation where out edge sum count is larger as
  1249. // the source BB's count is collected by a post-dominated block.
  1250. if (Count->CountValue > OutSum)
  1251. Total = Count->CountValue - OutSum;
  1252. setEdgeCount(Count->OutEdges, Total);
  1253. Changes = true;
  1254. }
  1255. if (Count->UnknownCountInEdge == 1) {
  1256. uint64_t Total = 0;
  1257. uint64_t InSum = sumEdgeCount(Count->InEdges);
  1258. if (Count->CountValue > InSum)
  1259. Total = Count->CountValue - InSum;
  1260. setEdgeCount(Count->InEdges, Total);
  1261. Changes = true;
  1262. }
  1263. }
  1264. }
  1265. }
  1266. LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
  1267. #ifndef NDEBUG
  1268. // Assert every BB has a valid counter.
  1269. for (auto &BB : F) {
  1270. auto BI = findBBInfo(&BB);
  1271. if (BI == nullptr)
  1272. continue;
  1273. assert(BI->CountValid && "BB count is not valid");
  1274. }
  1275. #endif
  1276. uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
  1277. uint64_t FuncMaxCount = FuncEntryCount;
  1278. for (auto &BB : F) {
  1279. auto BI = findBBInfo(&BB);
  1280. if (BI == nullptr)
  1281. continue;
  1282. FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
  1283. }
  1284. // Fix the obviously inconsistent entry count.
  1285. if (FuncMaxCount > 0 && FuncEntryCount == 0)
  1286. FuncEntryCount = 1;
  1287. F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
  1288. markFunctionAttributes(FuncEntryCount, FuncMaxCount);
  1289. // Now annotate select instructions
  1290. FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
  1291. assert(CountPosition == ProfileCountSize);
  1292. LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
  1293. }
  1294. // Assign the scaled count values to the BB with multiple out edges.
  1295. void PGOUseFunc::setBranchWeights() {
  1296. // Generate MD_prof metadata for every branch instruction.
  1297. LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
  1298. << " IsCS=" << IsCS << "\n");
  1299. for (auto &BB : F) {
  1300. Instruction *TI = BB.getTerminator();
  1301. if (TI->getNumSuccessors() < 2)
  1302. continue;
  1303. if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
  1304. isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
  1305. continue;
  1306. if (getBBInfo(&BB).CountValue == 0)
  1307. continue;
  1308. // We have a non-zero Branch BB.
  1309. const UseBBInfo &BBCountInfo = getBBInfo(&BB);
  1310. unsigned Size = BBCountInfo.OutEdges.size();
  1311. SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
  1312. uint64_t MaxCount = 0;
  1313. for (unsigned s = 0; s < Size; s++) {
  1314. const PGOUseEdge *E = BBCountInfo.OutEdges[s];
  1315. const BasicBlock *SrcBB = E->SrcBB;
  1316. const BasicBlock *DestBB = E->DestBB;
  1317. if (DestBB == nullptr)
  1318. continue;
  1319. unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
  1320. uint64_t EdgeCount = E->CountValue;
  1321. if (EdgeCount > MaxCount)
  1322. MaxCount = EdgeCount;
  1323. EdgeCounts[SuccNum] = EdgeCount;
  1324. }
  1325. setProfMetadata(M, TI, EdgeCounts, MaxCount);
  1326. }
  1327. }
  1328. static bool isIndirectBrTarget(BasicBlock *BB) {
  1329. for (BasicBlock *Pred : predecessors(BB)) {
  1330. if (isa<IndirectBrInst>(Pred->getTerminator()))
  1331. return true;
  1332. }
  1333. return false;
  1334. }
  1335. void PGOUseFunc::annotateIrrLoopHeaderWeights() {
  1336. LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
  1337. // Find irr loop headers
  1338. for (auto &BB : F) {
  1339. // As a heuristic also annotate indrectbr targets as they have a high chance
  1340. // to become an irreducible loop header after the indirectbr tail
  1341. // duplication.
  1342. if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
  1343. Instruction *TI = BB.getTerminator();
  1344. const UseBBInfo &BBCountInfo = getBBInfo(&BB);
  1345. setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
  1346. }
  1347. }
  1348. }
  1349. void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
  1350. if (PGOFunctionEntryCoverage)
  1351. return;
  1352. Module *M = F.getParent();
  1353. IRBuilder<> Builder(&SI);
  1354. Type *Int64Ty = Builder.getInt64Ty();
  1355. Type *I8PtrTy = Builder.getInt8PtrTy();
  1356. auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
  1357. Builder.CreateCall(
  1358. Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
  1359. {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
  1360. Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
  1361. Builder.getInt32(*CurCtrIdx), Step});
  1362. ++(*CurCtrIdx);
  1363. }
  1364. void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
  1365. std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
  1366. assert(*CurCtrIdx < CountFromProfile.size() &&
  1367. "Out of bound access of counters");
  1368. uint64_t SCounts[2];
  1369. SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
  1370. ++(*CurCtrIdx);
  1371. uint64_t TotalCount = 0;
  1372. auto BI = UseFunc->findBBInfo(SI.getParent());
  1373. if (BI != nullptr)
  1374. TotalCount = BI->CountValue;
  1375. // False Count
  1376. SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
  1377. uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
  1378. if (MaxCount)
  1379. setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
  1380. }
  1381. void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
  1382. if (!PGOInstrSelect)
  1383. return;
  1384. // FIXME: do not handle this yet.
  1385. if (SI.getCondition()->getType()->isVectorTy())
  1386. return;
  1387. switch (Mode) {
  1388. case VM_counting:
  1389. NSIs++;
  1390. return;
  1391. case VM_instrument:
  1392. instrumentOneSelectInst(SI);
  1393. return;
  1394. case VM_annotate:
  1395. annotateOneSelectInst(SI);
  1396. return;
  1397. }
  1398. llvm_unreachable("Unknown visiting mode");
  1399. }
  1400. // Traverse all valuesites and annotate the instructions for all value kind.
  1401. void PGOUseFunc::annotateValueSites() {
  1402. if (DisableValueProfiling)
  1403. return;
  1404. // Create the PGOFuncName meta data.
  1405. createPGOFuncNameMetadata(F, FuncInfo.FuncName);
  1406. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
  1407. annotateValueSites(Kind);
  1408. }
  1409. // Annotate the instructions for a specific value kind.
  1410. void PGOUseFunc::annotateValueSites(uint32_t Kind) {
  1411. assert(Kind <= IPVK_Last);
  1412. unsigned ValueSiteIndex = 0;
  1413. auto &ValueSites = FuncInfo.ValueSites[Kind];
  1414. unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
  1415. if (NumValueSites != ValueSites.size()) {
  1416. auto &Ctx = M->getContext();
  1417. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1418. M->getName().data(),
  1419. Twine("Inconsistent number of value sites for ") +
  1420. Twine(ValueProfKindDescr[Kind]) +
  1421. Twine(" profiling in \"") + F.getName().str() +
  1422. Twine("\", possibly due to the use of a stale profile."),
  1423. DS_Warning));
  1424. return;
  1425. }
  1426. for (VPCandidateInfo &I : ValueSites) {
  1427. LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
  1428. << "): Index = " << ValueSiteIndex << " out of "
  1429. << NumValueSites << "\n");
  1430. annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
  1431. static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
  1432. Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
  1433. : MaxNumAnnotations);
  1434. ValueSiteIndex++;
  1435. }
  1436. }
  1437. // Collect the set of members for each Comdat in module M and store
  1438. // in ComdatMembers.
  1439. static void collectComdatMembers(
  1440. Module &M,
  1441. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
  1442. if (!DoComdatRenaming)
  1443. return;
  1444. for (Function &F : M)
  1445. if (Comdat *C = F.getComdat())
  1446. ComdatMembers.insert(std::make_pair(C, &F));
  1447. for (GlobalVariable &GV : M.globals())
  1448. if (Comdat *C = GV.getComdat())
  1449. ComdatMembers.insert(std::make_pair(C, &GV));
  1450. for (GlobalAlias &GA : M.aliases())
  1451. if (Comdat *C = GA.getComdat())
  1452. ComdatMembers.insert(std::make_pair(C, &GA));
  1453. }
  1454. static bool InstrumentAllFunctions(
  1455. Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
  1456. function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
  1457. function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
  1458. // For the context-sensitve instrumentation, we should have a separated pass
  1459. // (before LTO/ThinLTO linking) to create these variables.
  1460. if (!IsCS)
  1461. createIRLevelProfileFlagVar(M, /*IsCS=*/false);
  1462. std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
  1463. collectComdatMembers(M, ComdatMembers);
  1464. for (auto &F : M) {
  1465. if (F.isDeclaration())
  1466. continue;
  1467. if (F.hasFnAttribute(llvm::Attribute::NoProfile))
  1468. continue;
  1469. auto &TLI = LookupTLI(F);
  1470. auto *BPI = LookupBPI(F);
  1471. auto *BFI = LookupBFI(F);
  1472. instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
  1473. }
  1474. return true;
  1475. }
  1476. PreservedAnalyses
  1477. PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
  1478. createProfileFileNameVar(M, CSInstrName);
  1479. // The variable in a comdat may be discarded by LTO. Ensure the declaration
  1480. // will be retained.
  1481. appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
  1482. return PreservedAnalyses::all();
  1483. }
  1484. bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
  1485. if (skipModule(M))
  1486. return false;
  1487. auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
  1488. return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  1489. };
  1490. auto LookupBPI = [this](Function &F) {
  1491. return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
  1492. };
  1493. auto LookupBFI = [this](Function &F) {
  1494. return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
  1495. };
  1496. return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
  1497. }
  1498. PreservedAnalyses PGOInstrumentationGen::run(Module &M,
  1499. ModuleAnalysisManager &AM) {
  1500. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  1501. auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
  1502. return FAM.getResult<TargetLibraryAnalysis>(F);
  1503. };
  1504. auto LookupBPI = [&FAM](Function &F) {
  1505. return &FAM.getResult<BranchProbabilityAnalysis>(F);
  1506. };
  1507. auto LookupBFI = [&FAM](Function &F) {
  1508. return &FAM.getResult<BlockFrequencyAnalysis>(F);
  1509. };
  1510. if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
  1511. return PreservedAnalyses::all();
  1512. return PreservedAnalyses::none();
  1513. }
  1514. // Using the ratio b/w sums of profile count values and BFI count values to
  1515. // adjust the func entry count.
  1516. static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
  1517. BranchProbabilityInfo &NBPI) {
  1518. Function &F = Func.getFunc();
  1519. BlockFrequencyInfo NBFI(F, NBPI, LI);
  1520. #ifndef NDEBUG
  1521. auto BFIEntryCount = F.getEntryCount();
  1522. assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) &&
  1523. "Invalid BFI Entrycount");
  1524. #endif
  1525. auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
  1526. auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
  1527. for (auto &BBI : F) {
  1528. uint64_t CountValue = 0;
  1529. uint64_t BFICountValue = 0;
  1530. if (!Func.findBBInfo(&BBI))
  1531. continue;
  1532. auto BFICount = NBFI.getBlockProfileCount(&BBI);
  1533. CountValue = Func.getBBInfo(&BBI).CountValue;
  1534. BFICountValue = BFICount.getValue();
  1535. SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
  1536. SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
  1537. }
  1538. if (SumCount.isZero())
  1539. return;
  1540. assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
  1541. "Incorrect sum of BFI counts");
  1542. if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
  1543. return;
  1544. double Scale = (SumCount / SumBFICount).convertToDouble();
  1545. if (Scale < 1.001 && Scale > 0.999)
  1546. return;
  1547. uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
  1548. uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
  1549. if (NewEntryCount == 0)
  1550. NewEntryCount = 1;
  1551. if (NewEntryCount != FuncEntryCount) {
  1552. F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
  1553. LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
  1554. << ", entry_count " << FuncEntryCount << " --> "
  1555. << NewEntryCount << "\n");
  1556. }
  1557. }
  1558. // Compare the profile count values with BFI count values, and print out
  1559. // the non-matching ones.
  1560. static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
  1561. BranchProbabilityInfo &NBPI,
  1562. uint64_t HotCountThreshold,
  1563. uint64_t ColdCountThreshold) {
  1564. Function &F = Func.getFunc();
  1565. BlockFrequencyInfo NBFI(F, NBPI, LI);
  1566. // bool PrintFunc = false;
  1567. bool HotBBOnly = PGOVerifyHotBFI;
  1568. std::string Msg;
  1569. OptimizationRemarkEmitter ORE(&F);
  1570. unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
  1571. for (auto &BBI : F) {
  1572. uint64_t CountValue = 0;
  1573. uint64_t BFICountValue = 0;
  1574. if (Func.getBBInfo(&BBI).CountValid)
  1575. CountValue = Func.getBBInfo(&BBI).CountValue;
  1576. BBNum++;
  1577. if (CountValue)
  1578. NonZeroBBNum++;
  1579. auto BFICount = NBFI.getBlockProfileCount(&BBI);
  1580. if (BFICount)
  1581. BFICountValue = BFICount.getValue();
  1582. if (HotBBOnly) {
  1583. bool rawIsHot = CountValue >= HotCountThreshold;
  1584. bool BFIIsHot = BFICountValue >= HotCountThreshold;
  1585. bool rawIsCold = CountValue <= ColdCountThreshold;
  1586. bool ShowCount = false;
  1587. if (rawIsHot && !BFIIsHot) {
  1588. Msg = "raw-Hot to BFI-nonHot";
  1589. ShowCount = true;
  1590. } else if (rawIsCold && BFIIsHot) {
  1591. Msg = "raw-Cold to BFI-Hot";
  1592. ShowCount = true;
  1593. }
  1594. if (!ShowCount)
  1595. continue;
  1596. } else {
  1597. if ((CountValue < PGOVerifyBFICutoff) &&
  1598. (BFICountValue < PGOVerifyBFICutoff))
  1599. continue;
  1600. uint64_t Diff = (BFICountValue >= CountValue)
  1601. ? BFICountValue - CountValue
  1602. : CountValue - BFICountValue;
  1603. if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
  1604. continue;
  1605. }
  1606. BBMisMatchNum++;
  1607. ORE.emit([&]() {
  1608. OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
  1609. F.getSubprogram(), &BBI);
  1610. Remark << "BB " << ore::NV("Block", BBI.getName())
  1611. << " Count=" << ore::NV("Count", CountValue)
  1612. << " BFI_Count=" << ore::NV("Count", BFICountValue);
  1613. if (!Msg.empty())
  1614. Remark << " (" << Msg << ")";
  1615. return Remark;
  1616. });
  1617. }
  1618. if (BBMisMatchNum)
  1619. ORE.emit([&]() {
  1620. return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
  1621. F.getSubprogram(), &F.getEntryBlock())
  1622. << "In Func " << ore::NV("Function", F.getName())
  1623. << ": Num_of_BB=" << ore::NV("Count", BBNum)
  1624. << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
  1625. << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
  1626. });
  1627. }
  1628. static bool annotateAllFunctions(
  1629. Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
  1630. function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
  1631. function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
  1632. function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
  1633. ProfileSummaryInfo *PSI, bool IsCS) {
  1634. LLVM_DEBUG(dbgs() << "Read in profile counters: ");
  1635. auto &Ctx = M.getContext();
  1636. // Read the counter array from file.
  1637. auto ReaderOrErr =
  1638. IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
  1639. if (Error E = ReaderOrErr.takeError()) {
  1640. handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
  1641. Ctx.diagnose(
  1642. DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
  1643. });
  1644. return false;
  1645. }
  1646. std::unique_ptr<IndexedInstrProfReader> PGOReader =
  1647. std::move(ReaderOrErr.get());
  1648. if (!PGOReader) {
  1649. Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
  1650. StringRef("Cannot get PGOReader")));
  1651. return false;
  1652. }
  1653. if (!PGOReader->hasCSIRLevelProfile() && IsCS)
  1654. return false;
  1655. // TODO: might need to change the warning once the clang option is finalized.
  1656. if (!PGOReader->isIRLevelProfile()) {
  1657. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1658. ProfileFileName.data(), "Not an IR level instrumentation profile"));
  1659. return false;
  1660. }
  1661. if (PGOReader->hasSingleByteCoverage()) {
  1662. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1663. ProfileFileName.data(),
  1664. "Cannot use coverage profiles for optimization"));
  1665. return false;
  1666. }
  1667. if (PGOReader->functionEntryOnly()) {
  1668. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1669. ProfileFileName.data(),
  1670. "Function entry profiles are not yet supported for optimization"));
  1671. return false;
  1672. }
  1673. // Add the profile summary (read from the header of the indexed summary) here
  1674. // so that we can use it below when reading counters (which checks if the
  1675. // function should be marked with a cold or inlinehint attribute).
  1676. M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
  1677. IsCS ? ProfileSummary::PSK_CSInstr
  1678. : ProfileSummary::PSK_Instr);
  1679. PSI->refresh();
  1680. std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
  1681. collectComdatMembers(M, ComdatMembers);
  1682. std::vector<Function *> HotFunctions;
  1683. std::vector<Function *> ColdFunctions;
  1684. // If the profile marked as always instrument the entry BB, do the
  1685. // same. Note this can be overwritten by the internal option in CFGMST.h
  1686. bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
  1687. if (PGOInstrumentEntry.getNumOccurrences() > 0)
  1688. InstrumentFuncEntry = PGOInstrumentEntry;
  1689. for (auto &F : M) {
  1690. if (F.isDeclaration())
  1691. continue;
  1692. auto &TLI = LookupTLI(F);
  1693. auto *BPI = LookupBPI(F);
  1694. auto *BFI = LookupBFI(F);
  1695. // Split indirectbr critical edges here before computing the MST rather than
  1696. // later in getInstrBB() to avoid invalidating it.
  1697. SplitIndirectBrCriticalEdges(F, BPI, BFI);
  1698. PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
  1699. InstrumentFuncEntry);
  1700. // When AllMinusOnes is true, it means the profile for the function
  1701. // is unrepresentative and this function is actually hot. Set the
  1702. // entry count of the function to be multiple times of hot threshold
  1703. // and drop all its internal counters.
  1704. bool AllMinusOnes = false;
  1705. bool AllZeros = false;
  1706. if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
  1707. continue;
  1708. if (AllZeros) {
  1709. F.setEntryCount(ProfileCount(0, Function::PCT_Real));
  1710. if (Func.getProgramMaxCount() != 0)
  1711. ColdFunctions.push_back(&F);
  1712. continue;
  1713. }
  1714. const unsigned MultiplyFactor = 3;
  1715. if (AllMinusOnes) {
  1716. uint64_t HotThreshold = PSI->getHotCountThreshold();
  1717. if (HotThreshold)
  1718. F.setEntryCount(
  1719. ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
  1720. HotFunctions.push_back(&F);
  1721. continue;
  1722. }
  1723. Func.populateCounters();
  1724. Func.setBranchWeights();
  1725. Func.annotateValueSites();
  1726. Func.annotateIrrLoopHeaderWeights();
  1727. PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
  1728. if (FreqAttr == PGOUseFunc::FFA_Cold)
  1729. ColdFunctions.push_back(&F);
  1730. else if (FreqAttr == PGOUseFunc::FFA_Hot)
  1731. HotFunctions.push_back(&F);
  1732. if (PGOViewCounts != PGOVCT_None &&
  1733. (ViewBlockFreqFuncName.empty() ||
  1734. F.getName().equals(ViewBlockFreqFuncName))) {
  1735. LoopInfo LI{DominatorTree(F)};
  1736. std::unique_ptr<BranchProbabilityInfo> NewBPI =
  1737. std::make_unique<BranchProbabilityInfo>(F, LI);
  1738. std::unique_ptr<BlockFrequencyInfo> NewBFI =
  1739. std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
  1740. if (PGOViewCounts == PGOVCT_Graph)
  1741. NewBFI->view();
  1742. else if (PGOViewCounts == PGOVCT_Text) {
  1743. dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
  1744. NewBFI->print(dbgs());
  1745. }
  1746. }
  1747. if (PGOViewRawCounts != PGOVCT_None &&
  1748. (ViewBlockFreqFuncName.empty() ||
  1749. F.getName().equals(ViewBlockFreqFuncName))) {
  1750. if (PGOViewRawCounts == PGOVCT_Graph)
  1751. if (ViewBlockFreqFuncName.empty())
  1752. WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
  1753. else
  1754. ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
  1755. else if (PGOViewRawCounts == PGOVCT_Text) {
  1756. dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
  1757. Func.dumpInfo();
  1758. }
  1759. }
  1760. if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
  1761. LoopInfo LI{DominatorTree(F)};
  1762. BranchProbabilityInfo NBPI(F, LI);
  1763. // Fix func entry count.
  1764. if (PGOFixEntryCount)
  1765. fixFuncEntryCount(Func, LI, NBPI);
  1766. // Verify BlockFrequency information.
  1767. uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
  1768. if (PGOVerifyHotBFI) {
  1769. HotCountThreshold = PSI->getOrCompHotCountThreshold();
  1770. ColdCountThreshold = PSI->getOrCompColdCountThreshold();
  1771. }
  1772. verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
  1773. }
  1774. }
  1775. // Set function hotness attribute from the profile.
  1776. // We have to apply these attributes at the end because their presence
  1777. // can affect the BranchProbabilityInfo of any callers, resulting in an
  1778. // inconsistent MST between prof-gen and prof-use.
  1779. for (auto &F : HotFunctions) {
  1780. F->addFnAttr(Attribute::InlineHint);
  1781. LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
  1782. << "\n");
  1783. }
  1784. for (auto &F : ColdFunctions) {
  1785. // Only set when there is no Attribute::Hot set by the user. For Hot
  1786. // attribute, user's annotation has the precedence over the profile.
  1787. if (F->hasFnAttribute(Attribute::Hot)) {
  1788. auto &Ctx = M.getContext();
  1789. std::string Msg = std::string("Function ") + F->getName().str() +
  1790. std::string(" is annotated as a hot function but"
  1791. " the profile is cold");
  1792. Ctx.diagnose(
  1793. DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
  1794. continue;
  1795. }
  1796. F->addFnAttr(Attribute::Cold);
  1797. LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
  1798. << "\n");
  1799. }
  1800. return true;
  1801. }
  1802. PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
  1803. std::string RemappingFilename,
  1804. bool IsCS)
  1805. : ProfileFileName(std::move(Filename)),
  1806. ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
  1807. if (!PGOTestProfileFile.empty())
  1808. ProfileFileName = PGOTestProfileFile;
  1809. if (!PGOTestProfileRemappingFile.empty())
  1810. ProfileRemappingFileName = PGOTestProfileRemappingFile;
  1811. }
  1812. PreservedAnalyses PGOInstrumentationUse::run(Module &M,
  1813. ModuleAnalysisManager &AM) {
  1814. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  1815. auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
  1816. return FAM.getResult<TargetLibraryAnalysis>(F);
  1817. };
  1818. auto LookupBPI = [&FAM](Function &F) {
  1819. return &FAM.getResult<BranchProbabilityAnalysis>(F);
  1820. };
  1821. auto LookupBFI = [&FAM](Function &F) {
  1822. return &FAM.getResult<BlockFrequencyAnalysis>(F);
  1823. };
  1824. auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
  1825. if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
  1826. LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
  1827. return PreservedAnalyses::all();
  1828. return PreservedAnalyses::none();
  1829. }
  1830. bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
  1831. if (skipModule(M))
  1832. return false;
  1833. auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
  1834. return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  1835. };
  1836. auto LookupBPI = [this](Function &F) {
  1837. return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
  1838. };
  1839. auto LookupBFI = [this](Function &F) {
  1840. return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
  1841. };
  1842. auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  1843. return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
  1844. LookupBFI, PSI, IsCS);
  1845. }
  1846. static std::string getSimpleNodeName(const BasicBlock *Node) {
  1847. if (!Node->getName().empty())
  1848. return std::string(Node->getName());
  1849. std::string SimpleNodeName;
  1850. raw_string_ostream OS(SimpleNodeName);
  1851. Node->printAsOperand(OS, false);
  1852. return OS.str();
  1853. }
  1854. void llvm::setProfMetadata(Module *M, Instruction *TI,
  1855. ArrayRef<uint64_t> EdgeCounts,
  1856. uint64_t MaxCount) {
  1857. MDBuilder MDB(M->getContext());
  1858. assert(MaxCount > 0 && "Bad max count");
  1859. uint64_t Scale = calculateCountScale(MaxCount);
  1860. SmallVector<unsigned, 4> Weights;
  1861. for (const auto &ECI : EdgeCounts)
  1862. Weights.push_back(scaleBranchCount(ECI, Scale));
  1863. LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
  1864. : Weights) {
  1865. dbgs() << W << " ";
  1866. } dbgs() << "\n";);
  1867. TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
  1868. if (EmitBranchProbability) {
  1869. std::string BrCondStr = getBranchCondString(TI);
  1870. if (BrCondStr.empty())
  1871. return;
  1872. uint64_t WSum =
  1873. std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
  1874. [](uint64_t w1, uint64_t w2) { return w1 + w2; });
  1875. uint64_t TotalCount =
  1876. std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
  1877. [](uint64_t c1, uint64_t c2) { return c1 + c2; });
  1878. Scale = calculateCountScale(WSum);
  1879. BranchProbability BP(scaleBranchCount(Weights[0], Scale),
  1880. scaleBranchCount(WSum, Scale));
  1881. std::string BranchProbStr;
  1882. raw_string_ostream OS(BranchProbStr);
  1883. OS << BP;
  1884. OS << " (total count : " << TotalCount << ")";
  1885. OS.flush();
  1886. Function *F = TI->getParent()->getParent();
  1887. OptimizationRemarkEmitter ORE(F);
  1888. ORE.emit([&]() {
  1889. return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
  1890. << BrCondStr << " is true with probability : " << BranchProbStr;
  1891. });
  1892. }
  1893. }
  1894. namespace llvm {
  1895. void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
  1896. MDBuilder MDB(M->getContext());
  1897. TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
  1898. MDB.createIrrLoopHeaderWeight(Count));
  1899. }
  1900. template <> struct GraphTraits<PGOUseFunc *> {
  1901. using NodeRef = const BasicBlock *;
  1902. using ChildIteratorType = const_succ_iterator;
  1903. using nodes_iterator = pointer_iterator<Function::const_iterator>;
  1904. static NodeRef getEntryNode(const PGOUseFunc *G) {
  1905. return &G->getFunc().front();
  1906. }
  1907. static ChildIteratorType child_begin(const NodeRef N) {
  1908. return succ_begin(N);
  1909. }
  1910. static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
  1911. static nodes_iterator nodes_begin(const PGOUseFunc *G) {
  1912. return nodes_iterator(G->getFunc().begin());
  1913. }
  1914. static nodes_iterator nodes_end(const PGOUseFunc *G) {
  1915. return nodes_iterator(G->getFunc().end());
  1916. }
  1917. };
  1918. template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
  1919. explicit DOTGraphTraits(bool isSimple = false)
  1920. : DefaultDOTGraphTraits(isSimple) {}
  1921. static std::string getGraphName(const PGOUseFunc *G) {
  1922. return std::string(G->getFunc().getName());
  1923. }
  1924. std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
  1925. std::string Result;
  1926. raw_string_ostream OS(Result);
  1927. OS << getSimpleNodeName(Node) << ":\\l";
  1928. UseBBInfo *BI = Graph->findBBInfo(Node);
  1929. OS << "Count : ";
  1930. if (BI && BI->CountValid)
  1931. OS << BI->CountValue << "\\l";
  1932. else
  1933. OS << "Unknown\\l";
  1934. if (!PGOInstrSelect)
  1935. return Result;
  1936. for (const Instruction &I : *Node) {
  1937. if (!isa<SelectInst>(&I))
  1938. continue;
  1939. // Display scaled counts for SELECT instruction:
  1940. OS << "SELECT : { T = ";
  1941. uint64_t TC, FC;
  1942. bool HasProf = I.extractProfMetadata(TC, FC);
  1943. if (!HasProf)
  1944. OS << "Unknown, F = Unknown }\\l";
  1945. else
  1946. OS << TC << ", F = " << FC << " }\\l";
  1947. }
  1948. return Result;
  1949. }
  1950. };
  1951. } // end namespace llvm