SampleProfReader.cpp 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614
  1. //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the class that reads LLVM sample profiles. It
  10. // supports three file formats: text, binary and gcov.
  11. //
  12. // The textual representation is useful for debugging and testing purposes. The
  13. // binary representation is more compact, resulting in smaller file sizes.
  14. //
  15. // The gcov encoding is the one generated by GCC's AutoFDO profile creation
  16. // tool (https://github.com/google/autofdo)
  17. //
  18. // All three encodings can be used interchangeably as an input sample profile.
  19. //
  20. //===----------------------------------------------------------------------===//
  21. #include "llvm/ProfileData/SampleProfReader.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/StringRef.h"
  25. #include "llvm/IR/ProfileSummary.h"
  26. #include "llvm/ProfileData/ProfileCommon.h"
  27. #include "llvm/ProfileData/SampleProf.h"
  28. #include "llvm/Support/Compression.h"
  29. #include "llvm/Support/ErrorOr.h"
  30. #include "llvm/Support/LEB128.h"
  31. #include "llvm/Support/LineIterator.h"
  32. #include "llvm/Support/MD5.h"
  33. #include "llvm/Support/MemoryBuffer.h"
  34. #include "llvm/Support/raw_ostream.h"
  35. #include <algorithm>
  36. #include <cstddef>
  37. #include <cstdint>
  38. #include <limits>
  39. #include <memory>
  40. #include <system_error>
  41. #include <vector>
  42. using namespace llvm;
  43. using namespace sampleprof;
  44. /// Dump the function profile for \p FName.
  45. ///
  46. /// \param FName Name of the function to print.
  47. /// \param OS Stream to emit the output to.
  48. void SampleProfileReader::dumpFunctionProfile(StringRef FName,
  49. raw_ostream &OS) {
  50. OS << "Function: " << FName << ": " << Profiles[FName];
  51. }
  52. /// Dump all the function profiles found on stream \p OS.
  53. void SampleProfileReader::dump(raw_ostream &OS) {
  54. for (const auto &I : Profiles)
  55. dumpFunctionProfile(I.getKey(), OS);
  56. }
  57. /// Parse \p Input as function head.
  58. ///
  59. /// Parse one line of \p Input, and update function name in \p FName,
  60. /// function's total sample count in \p NumSamples, function's entry
  61. /// count in \p NumHeadSamples.
  62. ///
  63. /// \returns true if parsing is successful.
  64. static bool ParseHead(const StringRef &Input, StringRef &FName,
  65. uint64_t &NumSamples, uint64_t &NumHeadSamples) {
  66. if (Input[0] == ' ')
  67. return false;
  68. size_t n2 = Input.rfind(':');
  69. size_t n1 = Input.rfind(':', n2 - 1);
  70. FName = Input.substr(0, n1);
  71. if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
  72. return false;
  73. if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
  74. return false;
  75. return true;
  76. }
  77. /// Returns true if line offset \p L is legal (only has 16 bits).
  78. static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
  79. /// Parse \p Input that contains metadata.
  80. /// Possible metadata:
  81. /// - CFG Checksum information:
  82. /// !CFGChecksum: 12345
  83. /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
  84. static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
  85. if (!Input.startswith("!CFGChecksum:"))
  86. return false;
  87. StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
  88. return !CFGInfo.getAsInteger(10, FunctionHash);
  89. }
  90. enum class LineType {
  91. CallSiteProfile,
  92. BodyProfile,
  93. Metadata,
  94. };
  95. /// Parse \p Input as line sample.
  96. ///
  97. /// \param Input input line.
  98. /// \param LineTy Type of this line.
  99. /// \param Depth the depth of the inline stack.
  100. /// \param NumSamples total samples of the line/inlined callsite.
  101. /// \param LineOffset line offset to the start of the function.
  102. /// \param Discriminator discriminator of the line.
  103. /// \param TargetCountMap map from indirect call target to count.
  104. /// \param FunctionHash the function's CFG hash, used by pseudo probe.
  105. ///
  106. /// returns true if parsing is successful.
  107. static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
  108. uint64_t &NumSamples, uint32_t &LineOffset,
  109. uint32_t &Discriminator, StringRef &CalleeName,
  110. DenseMap<StringRef, uint64_t> &TargetCountMap,
  111. uint64_t &FunctionHash) {
  112. for (Depth = 0; Input[Depth] == ' '; Depth++)
  113. ;
  114. if (Depth == 0)
  115. return false;
  116. if (Depth == 1 && Input[Depth] == '!') {
  117. LineTy = LineType::Metadata;
  118. return parseMetadata(Input.substr(Depth), FunctionHash);
  119. }
  120. size_t n1 = Input.find(':');
  121. StringRef Loc = Input.substr(Depth, n1 - Depth);
  122. size_t n2 = Loc.find('.');
  123. if (n2 == StringRef::npos) {
  124. if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
  125. return false;
  126. Discriminator = 0;
  127. } else {
  128. if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
  129. return false;
  130. if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
  131. return false;
  132. }
  133. StringRef Rest = Input.substr(n1 + 2);
  134. if (isDigit(Rest[0])) {
  135. LineTy = LineType::BodyProfile;
  136. size_t n3 = Rest.find(' ');
  137. if (n3 == StringRef::npos) {
  138. if (Rest.getAsInteger(10, NumSamples))
  139. return false;
  140. } else {
  141. if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
  142. return false;
  143. }
  144. // Find call targets and their sample counts.
  145. // Note: In some cases, there are symbols in the profile which are not
  146. // mangled. To accommodate such cases, use colon + integer pairs as the
  147. // anchor points.
  148. // An example:
  149. // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
  150. // ":1000" and ":437" are used as anchor points so the string above will
  151. // be interpreted as
  152. // target: _M_construct<char *>
  153. // count: 1000
  154. // target: string_view<std::allocator<char> >
  155. // count: 437
  156. while (n3 != StringRef::npos) {
  157. n3 += Rest.substr(n3).find_first_not_of(' ');
  158. Rest = Rest.substr(n3);
  159. n3 = Rest.find_first_of(':');
  160. if (n3 == StringRef::npos || n3 == 0)
  161. return false;
  162. StringRef Target;
  163. uint64_t count, n4;
  164. while (true) {
  165. // Get the segment after the current colon.
  166. StringRef AfterColon = Rest.substr(n3 + 1);
  167. // Get the target symbol before the current colon.
  168. Target = Rest.substr(0, n3);
  169. // Check if the word after the current colon is an integer.
  170. n4 = AfterColon.find_first_of(' ');
  171. n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
  172. StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
  173. if (!WordAfterColon.getAsInteger(10, count))
  174. break;
  175. // Try to find the next colon.
  176. uint64_t n5 = AfterColon.find_first_of(':');
  177. if (n5 == StringRef::npos)
  178. return false;
  179. n3 += n5 + 1;
  180. }
  181. // An anchor point is found. Save the {target, count} pair
  182. TargetCountMap[Target] = count;
  183. if (n4 == Rest.size())
  184. break;
  185. // Change n3 to the next blank space after colon + integer pair.
  186. n3 = n4;
  187. }
  188. } else {
  189. LineTy = LineType::CallSiteProfile;
  190. size_t n3 = Rest.find_last_of(':');
  191. CalleeName = Rest.substr(0, n3);
  192. if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
  193. return false;
  194. }
  195. return true;
  196. }
  197. /// Load samples from a text file.
  198. ///
  199. /// See the documentation at the top of the file for an explanation of
  200. /// the expected format.
  201. ///
  202. /// \returns true if the file was loaded successfully, false otherwise.
  203. std::error_code SampleProfileReaderText::readImpl() {
  204. line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
  205. sampleprof_error Result = sampleprof_error::success;
  206. InlineCallStack InlineStack;
  207. uint32_t ProbeProfileCount = 0;
  208. // SeenMetadata tracks whether we have processed metadata for the current
  209. // top-level function profile.
  210. bool SeenMetadata = false;
  211. for (; !LineIt.is_at_eof(); ++LineIt) {
  212. if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
  213. continue;
  214. // Read the header of each function.
  215. //
  216. // Note that for function identifiers we are actually expecting
  217. // mangled names, but we may not always get them. This happens when
  218. // the compiler decides not to emit the function (e.g., it was inlined
  219. // and removed). In this case, the binary will not have the linkage
  220. // name for the function, so the profiler will emit the function's
  221. // unmangled name, which may contain characters like ':' and '>' in its
  222. // name (member functions, templates, etc).
  223. //
  224. // The only requirement we place on the identifier, then, is that it
  225. // should not begin with a number.
  226. if ((*LineIt)[0] != ' ') {
  227. uint64_t NumSamples, NumHeadSamples;
  228. StringRef FName;
  229. if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
  230. reportError(LineIt.line_number(),
  231. "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
  232. return sampleprof_error::malformed;
  233. }
  234. SeenMetadata = false;
  235. SampleContext FContext(FName);
  236. if (FContext.hasContext())
  237. ++CSProfileCount;
  238. Profiles[FContext] = FunctionSamples();
  239. FunctionSamples &FProfile = Profiles[FContext];
  240. FProfile.setName(FContext.getNameWithoutContext());
  241. FProfile.setContext(FContext);
  242. MergeResult(Result, FProfile.addTotalSamples(NumSamples));
  243. MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
  244. InlineStack.clear();
  245. InlineStack.push_back(&FProfile);
  246. } else {
  247. uint64_t NumSamples;
  248. StringRef FName;
  249. DenseMap<StringRef, uint64_t> TargetCountMap;
  250. uint32_t Depth, LineOffset, Discriminator;
  251. LineType LineTy;
  252. uint64_t FunctionHash;
  253. if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
  254. Discriminator, FName, TargetCountMap, FunctionHash)) {
  255. reportError(LineIt.line_number(),
  256. "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
  257. *LineIt);
  258. return sampleprof_error::malformed;
  259. }
  260. if (SeenMetadata && LineTy != LineType::Metadata) {
  261. // Metadata must be put at the end of a function profile.
  262. reportError(LineIt.line_number(),
  263. "Found non-metadata after metadata: " + *LineIt);
  264. return sampleprof_error::malformed;
  265. }
  266. while (InlineStack.size() > Depth) {
  267. InlineStack.pop_back();
  268. }
  269. switch (LineTy) {
  270. case LineType::CallSiteProfile: {
  271. FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
  272. LineLocation(LineOffset, Discriminator))[std::string(FName)];
  273. FSamples.setName(FName);
  274. MergeResult(Result, FSamples.addTotalSamples(NumSamples));
  275. InlineStack.push_back(&FSamples);
  276. break;
  277. }
  278. case LineType::BodyProfile: {
  279. while (InlineStack.size() > Depth) {
  280. InlineStack.pop_back();
  281. }
  282. FunctionSamples &FProfile = *InlineStack.back();
  283. for (const auto &name_count : TargetCountMap) {
  284. MergeResult(Result, FProfile.addCalledTargetSamples(
  285. LineOffset, Discriminator, name_count.first,
  286. name_count.second));
  287. }
  288. MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
  289. NumSamples));
  290. break;
  291. }
  292. case LineType::Metadata: {
  293. FunctionSamples &FProfile = *InlineStack.back();
  294. FProfile.setFunctionHash(FunctionHash);
  295. ++ProbeProfileCount;
  296. SeenMetadata = true;
  297. break;
  298. }
  299. }
  300. }
  301. }
  302. assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
  303. "Cannot have both context-sensitive and regular profile");
  304. ProfileIsCS = (CSProfileCount > 0);
  305. assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
  306. "Cannot have both probe-based profiles and regular profiles");
  307. ProfileIsProbeBased = (ProbeProfileCount > 0);
  308. FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
  309. FunctionSamples::ProfileIsCS = ProfileIsCS;
  310. if (Result == sampleprof_error::success)
  311. computeSummary();
  312. return Result;
  313. }
  314. bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
  315. bool result = false;
  316. // Check that the first non-comment line is a valid function header.
  317. line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
  318. if (!LineIt.is_at_eof()) {
  319. if ((*LineIt)[0] != ' ') {
  320. uint64_t NumSamples, NumHeadSamples;
  321. StringRef FName;
  322. result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
  323. }
  324. }
  325. return result;
  326. }
  327. template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
  328. unsigned NumBytesRead = 0;
  329. std::error_code EC;
  330. uint64_t Val = decodeULEB128(Data, &NumBytesRead);
  331. if (Val > std::numeric_limits<T>::max())
  332. EC = sampleprof_error::malformed;
  333. else if (Data + NumBytesRead > End)
  334. EC = sampleprof_error::truncated;
  335. else
  336. EC = sampleprof_error::success;
  337. if (EC) {
  338. reportError(0, EC.message());
  339. return EC;
  340. }
  341. Data += NumBytesRead;
  342. return static_cast<T>(Val);
  343. }
  344. ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
  345. std::error_code EC;
  346. StringRef Str(reinterpret_cast<const char *>(Data));
  347. if (Data + Str.size() + 1 > End) {
  348. EC = sampleprof_error::truncated;
  349. reportError(0, EC.message());
  350. return EC;
  351. }
  352. Data += Str.size() + 1;
  353. return Str;
  354. }
  355. template <typename T>
  356. ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
  357. std::error_code EC;
  358. if (Data + sizeof(T) > End) {
  359. EC = sampleprof_error::truncated;
  360. reportError(0, EC.message());
  361. return EC;
  362. }
  363. using namespace support;
  364. T Val = endian::readNext<T, little, unaligned>(Data);
  365. return Val;
  366. }
  367. template <typename T>
  368. inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
  369. std::error_code EC;
  370. auto Idx = readNumber<uint32_t>();
  371. if (std::error_code EC = Idx.getError())
  372. return EC;
  373. if (*Idx >= Table.size())
  374. return sampleprof_error::truncated_name_table;
  375. return *Idx;
  376. }
  377. ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
  378. auto Idx = readStringIndex(NameTable);
  379. if (std::error_code EC = Idx.getError())
  380. return EC;
  381. return NameTable[*Idx];
  382. }
  383. ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
  384. if (!FixedLengthMD5)
  385. return SampleProfileReaderBinary::readStringFromTable();
  386. // read NameTable index.
  387. auto Idx = readStringIndex(NameTable);
  388. if (std::error_code EC = Idx.getError())
  389. return EC;
  390. // Check whether the name to be accessed has been accessed before,
  391. // if not, read it from memory directly.
  392. StringRef &SR = NameTable[*Idx];
  393. if (SR.empty()) {
  394. const uint8_t *SavedData = Data;
  395. Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
  396. auto FID = readUnencodedNumber<uint64_t>();
  397. if (std::error_code EC = FID.getError())
  398. return EC;
  399. // Save the string converted from uint64_t in MD5StringBuf. All the
  400. // references to the name are all StringRefs refering to the string
  401. // in MD5StringBuf.
  402. MD5StringBuf->push_back(std::to_string(*FID));
  403. SR = MD5StringBuf->back();
  404. Data = SavedData;
  405. }
  406. return SR;
  407. }
  408. ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
  409. auto Idx = readStringIndex(NameTable);
  410. if (std::error_code EC = Idx.getError())
  411. return EC;
  412. return StringRef(NameTable[*Idx]);
  413. }
  414. std::error_code
  415. SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
  416. auto NumSamples = readNumber<uint64_t>();
  417. if (std::error_code EC = NumSamples.getError())
  418. return EC;
  419. FProfile.addTotalSamples(*NumSamples);
  420. // Read the samples in the body.
  421. auto NumRecords = readNumber<uint32_t>();
  422. if (std::error_code EC = NumRecords.getError())
  423. return EC;
  424. for (uint32_t I = 0; I < *NumRecords; ++I) {
  425. auto LineOffset = readNumber<uint64_t>();
  426. if (std::error_code EC = LineOffset.getError())
  427. return EC;
  428. if (!isOffsetLegal(*LineOffset)) {
  429. return std::error_code();
  430. }
  431. auto Discriminator = readNumber<uint64_t>();
  432. if (std::error_code EC = Discriminator.getError())
  433. return EC;
  434. auto NumSamples = readNumber<uint64_t>();
  435. if (std::error_code EC = NumSamples.getError())
  436. return EC;
  437. auto NumCalls = readNumber<uint32_t>();
  438. if (std::error_code EC = NumCalls.getError())
  439. return EC;
  440. for (uint32_t J = 0; J < *NumCalls; ++J) {
  441. auto CalledFunction(readStringFromTable());
  442. if (std::error_code EC = CalledFunction.getError())
  443. return EC;
  444. auto CalledFunctionSamples = readNumber<uint64_t>();
  445. if (std::error_code EC = CalledFunctionSamples.getError())
  446. return EC;
  447. FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
  448. *CalledFunction, *CalledFunctionSamples);
  449. }
  450. FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
  451. }
  452. // Read all the samples for inlined function calls.
  453. auto NumCallsites = readNumber<uint32_t>();
  454. if (std::error_code EC = NumCallsites.getError())
  455. return EC;
  456. for (uint32_t J = 0; J < *NumCallsites; ++J) {
  457. auto LineOffset = readNumber<uint64_t>();
  458. if (std::error_code EC = LineOffset.getError())
  459. return EC;
  460. auto Discriminator = readNumber<uint64_t>();
  461. if (std::error_code EC = Discriminator.getError())
  462. return EC;
  463. auto FName(readStringFromTable());
  464. if (std::error_code EC = FName.getError())
  465. return EC;
  466. FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
  467. LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
  468. CalleeProfile.setName(*FName);
  469. if (std::error_code EC = readProfile(CalleeProfile))
  470. return EC;
  471. }
  472. return sampleprof_error::success;
  473. }
  474. std::error_code
  475. SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
  476. Data = Start;
  477. auto NumHeadSamples = readNumber<uint64_t>();
  478. if (std::error_code EC = NumHeadSamples.getError())
  479. return EC;
  480. auto FName(readStringFromTable());
  481. if (std::error_code EC = FName.getError())
  482. return EC;
  483. SampleContext FContext(*FName);
  484. Profiles[FContext] = FunctionSamples();
  485. FunctionSamples &FProfile = Profiles[FContext];
  486. FProfile.setName(FContext.getNameWithoutContext());
  487. FProfile.setContext(FContext);
  488. FProfile.addHeadSamples(*NumHeadSamples);
  489. if (FContext.hasContext())
  490. CSProfileCount++;
  491. if (std::error_code EC = readProfile(FProfile))
  492. return EC;
  493. return sampleprof_error::success;
  494. }
  495. std::error_code SampleProfileReaderBinary::readImpl() {
  496. while (!at_eof()) {
  497. if (std::error_code EC = readFuncProfile(Data))
  498. return EC;
  499. }
  500. return sampleprof_error::success;
  501. }
  502. std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
  503. const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
  504. Data = Start;
  505. End = Start + Size;
  506. switch (Entry.Type) {
  507. case SecProfSummary:
  508. if (std::error_code EC = readSummary())
  509. return EC;
  510. if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
  511. Summary->setPartialProfile(true);
  512. break;
  513. case SecNameTable: {
  514. FixedLengthMD5 =
  515. hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
  516. bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
  517. assert((!FixedLengthMD5 || UseMD5) &&
  518. "If FixedLengthMD5 is true, UseMD5 has to be true");
  519. if (std::error_code EC = readNameTableSec(UseMD5))
  520. return EC;
  521. break;
  522. }
  523. case SecLBRProfile:
  524. if (std::error_code EC = readFuncProfiles())
  525. return EC;
  526. break;
  527. case SecFuncOffsetTable:
  528. if (std::error_code EC = readFuncOffsetTable())
  529. return EC;
  530. break;
  531. case SecFuncMetadata:
  532. ProfileIsProbeBased =
  533. hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
  534. FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
  535. if (std::error_code EC = readFuncMetadata())
  536. return EC;
  537. break;
  538. case SecProfileSymbolList:
  539. if (std::error_code EC = readProfileSymbolList())
  540. return EC;
  541. break;
  542. default:
  543. if (std::error_code EC = readCustomSection(Entry))
  544. return EC;
  545. break;
  546. }
  547. return sampleprof_error::success;
  548. }
  549. void SampleProfileReaderExtBinaryBase::collectFuncsFrom(const Module &M) {
  550. UseAllFuncs = false;
  551. FuncsToUse.clear();
  552. for (auto &F : M)
  553. FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
  554. }
  555. std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
  556. // If there are more than one FuncOffsetTable, the profile read associated
  557. // with previous FuncOffsetTable has to be done before next FuncOffsetTable
  558. // is read.
  559. FuncOffsetTable.clear();
  560. auto Size = readNumber<uint64_t>();
  561. if (std::error_code EC = Size.getError())
  562. return EC;
  563. FuncOffsetTable.reserve(*Size);
  564. for (uint32_t I = 0; I < *Size; ++I) {
  565. auto FName(readStringFromTable());
  566. if (std::error_code EC = FName.getError())
  567. return EC;
  568. auto Offset = readNumber<uint64_t>();
  569. if (std::error_code EC = Offset.getError())
  570. return EC;
  571. FuncOffsetTable[*FName] = *Offset;
  572. }
  573. return sampleprof_error::success;
  574. }
  575. std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
  576. const uint8_t *Start = Data;
  577. if (UseAllFuncs) {
  578. while (Data < End) {
  579. if (std::error_code EC = readFuncProfile(Data))
  580. return EC;
  581. }
  582. assert(Data == End && "More data is read than expected");
  583. } else {
  584. if (Remapper) {
  585. for (auto Name : FuncsToUse) {
  586. Remapper->insert(Name);
  587. }
  588. }
  589. if (useMD5()) {
  590. for (auto Name : FuncsToUse) {
  591. auto GUID = std::to_string(MD5Hash(Name));
  592. auto iter = FuncOffsetTable.find(StringRef(GUID));
  593. if (iter == FuncOffsetTable.end())
  594. continue;
  595. const uint8_t *FuncProfileAddr = Start + iter->second;
  596. assert(FuncProfileAddr < End && "out of LBRProfile section");
  597. if (std::error_code EC = readFuncProfile(FuncProfileAddr))
  598. return EC;
  599. }
  600. } else {
  601. for (auto NameOffset : FuncOffsetTable) {
  602. SampleContext FContext(NameOffset.first);
  603. auto FuncName = FContext.getNameWithoutContext();
  604. if (!FuncsToUse.count(FuncName) &&
  605. (!Remapper || !Remapper->exist(FuncName)))
  606. continue;
  607. const uint8_t *FuncProfileAddr = Start + NameOffset.second;
  608. assert(FuncProfileAddr < End && "out of LBRProfile section");
  609. if (std::error_code EC = readFuncProfile(FuncProfileAddr))
  610. return EC;
  611. }
  612. }
  613. Data = End;
  614. }
  615. assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
  616. "Cannot have both context-sensitive and regular profile");
  617. ProfileIsCS = (CSProfileCount > 0);
  618. FunctionSamples::ProfileIsCS = ProfileIsCS;
  619. return sampleprof_error::success;
  620. }
  621. std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
  622. if (!ProfSymList)
  623. ProfSymList = std::make_unique<ProfileSymbolList>();
  624. if (std::error_code EC = ProfSymList->read(Data, End - Data))
  625. return EC;
  626. Data = End;
  627. return sampleprof_error::success;
  628. }
  629. std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
  630. const uint8_t *SecStart, const uint64_t SecSize,
  631. const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
  632. Data = SecStart;
  633. End = SecStart + SecSize;
  634. auto DecompressSize = readNumber<uint64_t>();
  635. if (std::error_code EC = DecompressSize.getError())
  636. return EC;
  637. DecompressBufSize = *DecompressSize;
  638. auto CompressSize = readNumber<uint64_t>();
  639. if (std::error_code EC = CompressSize.getError())
  640. return EC;
  641. if (!llvm::zlib::isAvailable())
  642. return sampleprof_error::zlib_unavailable;
  643. StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
  644. *CompressSize);
  645. char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
  646. size_t UCSize = DecompressBufSize;
  647. llvm::Error E =
  648. zlib::uncompress(CompressedStrings, Buffer, UCSize);
  649. if (E)
  650. return sampleprof_error::uncompress_failed;
  651. DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
  652. return sampleprof_error::success;
  653. }
  654. std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
  655. const uint8_t *BufStart =
  656. reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
  657. for (auto &Entry : SecHdrTable) {
  658. // Skip empty section.
  659. if (!Entry.Size)
  660. continue;
  661. // Skip sections without context when SkipFlatProf is true.
  662. if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
  663. continue;
  664. const uint8_t *SecStart = BufStart + Entry.Offset;
  665. uint64_t SecSize = Entry.Size;
  666. // If the section is compressed, decompress it into a buffer
  667. // DecompressBuf before reading the actual data. The pointee of
  668. // 'Data' will be changed to buffer hold by DecompressBuf
  669. // temporarily when reading the actual data.
  670. bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
  671. if (isCompressed) {
  672. const uint8_t *DecompressBuf;
  673. uint64_t DecompressBufSize;
  674. if (std::error_code EC = decompressSection(
  675. SecStart, SecSize, DecompressBuf, DecompressBufSize))
  676. return EC;
  677. SecStart = DecompressBuf;
  678. SecSize = DecompressBufSize;
  679. }
  680. if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
  681. return EC;
  682. if (Data != SecStart + SecSize)
  683. return sampleprof_error::malformed;
  684. // Change the pointee of 'Data' from DecompressBuf to original Buffer.
  685. if (isCompressed) {
  686. Data = BufStart + Entry.Offset;
  687. End = BufStart + Buffer->getBufferSize();
  688. }
  689. }
  690. return sampleprof_error::success;
  691. }
  692. std::error_code SampleProfileReaderCompactBinary::readImpl() {
  693. std::vector<uint64_t> OffsetsToUse;
  694. if (UseAllFuncs) {
  695. for (auto FuncEntry : FuncOffsetTable) {
  696. OffsetsToUse.push_back(FuncEntry.second);
  697. }
  698. }
  699. else {
  700. for (auto Name : FuncsToUse) {
  701. auto GUID = std::to_string(MD5Hash(Name));
  702. auto iter = FuncOffsetTable.find(StringRef(GUID));
  703. if (iter == FuncOffsetTable.end())
  704. continue;
  705. OffsetsToUse.push_back(iter->second);
  706. }
  707. }
  708. for (auto Offset : OffsetsToUse) {
  709. const uint8_t *SavedData = Data;
  710. if (std::error_code EC = readFuncProfile(
  711. reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
  712. Offset))
  713. return EC;
  714. Data = SavedData;
  715. }
  716. return sampleprof_error::success;
  717. }
  718. std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
  719. if (Magic == SPMagic())
  720. return sampleprof_error::success;
  721. return sampleprof_error::bad_magic;
  722. }
  723. std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
  724. if (Magic == SPMagic(SPF_Ext_Binary))
  725. return sampleprof_error::success;
  726. return sampleprof_error::bad_magic;
  727. }
  728. std::error_code
  729. SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
  730. if (Magic == SPMagic(SPF_Compact_Binary))
  731. return sampleprof_error::success;
  732. return sampleprof_error::bad_magic;
  733. }
  734. std::error_code SampleProfileReaderBinary::readNameTable() {
  735. auto Size = readNumber<uint32_t>();
  736. if (std::error_code EC = Size.getError())
  737. return EC;
  738. NameTable.reserve(*Size + NameTable.size());
  739. for (uint32_t I = 0; I < *Size; ++I) {
  740. auto Name(readString());
  741. if (std::error_code EC = Name.getError())
  742. return EC;
  743. NameTable.push_back(*Name);
  744. }
  745. return sampleprof_error::success;
  746. }
  747. std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
  748. auto Size = readNumber<uint64_t>();
  749. if (std::error_code EC = Size.getError())
  750. return EC;
  751. MD5StringBuf = std::make_unique<std::vector<std::string>>();
  752. MD5StringBuf->reserve(*Size);
  753. if (FixedLengthMD5) {
  754. // Preallocate and initialize NameTable so we can check whether a name
  755. // index has been read before by checking whether the element in the
  756. // NameTable is empty, meanwhile readStringIndex can do the boundary
  757. // check using the size of NameTable.
  758. NameTable.resize(*Size + NameTable.size());
  759. MD5NameMemStart = Data;
  760. Data = Data + (*Size) * sizeof(uint64_t);
  761. return sampleprof_error::success;
  762. }
  763. NameTable.reserve(*Size);
  764. for (uint32_t I = 0; I < *Size; ++I) {
  765. auto FID = readNumber<uint64_t>();
  766. if (std::error_code EC = FID.getError())
  767. return EC;
  768. MD5StringBuf->push_back(std::to_string(*FID));
  769. // NameTable is a vector of StringRef. Here it is pushing back a
  770. // StringRef initialized with the last string in MD5stringBuf.
  771. NameTable.push_back(MD5StringBuf->back());
  772. }
  773. return sampleprof_error::success;
  774. }
  775. std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
  776. if (IsMD5)
  777. return readMD5NameTable();
  778. return SampleProfileReaderBinary::readNameTable();
  779. }
  780. std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
  781. if (!ProfileIsProbeBased)
  782. return sampleprof_error::success;
  783. while (Data < End) {
  784. auto FName(readStringFromTable());
  785. if (std::error_code EC = FName.getError())
  786. return EC;
  787. auto Checksum = readNumber<uint64_t>();
  788. if (std::error_code EC = Checksum.getError())
  789. return EC;
  790. SampleContext FContext(*FName);
  791. // No need to load metadata for profiles that are not loaded in the current
  792. // module.
  793. if (Profiles.count(FContext))
  794. Profiles[FContext].setFunctionHash(*Checksum);
  795. }
  796. assert(Data == End && "More data is read than expected");
  797. return sampleprof_error::success;
  798. }
  799. std::error_code SampleProfileReaderCompactBinary::readNameTable() {
  800. auto Size = readNumber<uint64_t>();
  801. if (std::error_code EC = Size.getError())
  802. return EC;
  803. NameTable.reserve(*Size);
  804. for (uint32_t I = 0; I < *Size; ++I) {
  805. auto FID = readNumber<uint64_t>();
  806. if (std::error_code EC = FID.getError())
  807. return EC;
  808. NameTable.push_back(std::to_string(*FID));
  809. }
  810. return sampleprof_error::success;
  811. }
  812. std::error_code
  813. SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
  814. SecHdrTableEntry Entry;
  815. auto Type = readUnencodedNumber<uint64_t>();
  816. if (std::error_code EC = Type.getError())
  817. return EC;
  818. Entry.Type = static_cast<SecType>(*Type);
  819. auto Flags = readUnencodedNumber<uint64_t>();
  820. if (std::error_code EC = Flags.getError())
  821. return EC;
  822. Entry.Flags = *Flags;
  823. auto Offset = readUnencodedNumber<uint64_t>();
  824. if (std::error_code EC = Offset.getError())
  825. return EC;
  826. Entry.Offset = *Offset;
  827. auto Size = readUnencodedNumber<uint64_t>();
  828. if (std::error_code EC = Size.getError())
  829. return EC;
  830. Entry.Size = *Size;
  831. Entry.LayoutIndex = Idx;
  832. SecHdrTable.push_back(std::move(Entry));
  833. return sampleprof_error::success;
  834. }
  835. std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
  836. auto EntryNum = readUnencodedNumber<uint64_t>();
  837. if (std::error_code EC = EntryNum.getError())
  838. return EC;
  839. for (uint32_t i = 0; i < (*EntryNum); i++)
  840. if (std::error_code EC = readSecHdrTableEntry(i))
  841. return EC;
  842. return sampleprof_error::success;
  843. }
  844. std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
  845. const uint8_t *BufStart =
  846. reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
  847. Data = BufStart;
  848. End = BufStart + Buffer->getBufferSize();
  849. if (std::error_code EC = readMagicIdent())
  850. return EC;
  851. if (std::error_code EC = readSecHdrTable())
  852. return EC;
  853. return sampleprof_error::success;
  854. }
  855. uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
  856. uint64_t Size = 0;
  857. for (auto &Entry : SecHdrTable) {
  858. if (Entry.Type == Type)
  859. Size += Entry.Size;
  860. }
  861. return Size;
  862. }
  863. uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
  864. // Sections in SecHdrTable is not necessarily in the same order as
  865. // sections in the profile because section like FuncOffsetTable needs
  866. // to be written after section LBRProfile but needs to be read before
  867. // section LBRProfile, so we cannot simply use the last entry in
  868. // SecHdrTable to calculate the file size.
  869. uint64_t FileSize = 0;
  870. for (auto &Entry : SecHdrTable) {
  871. FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
  872. }
  873. return FileSize;
  874. }
  875. static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
  876. std::string Flags;
  877. if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
  878. Flags.append("{compressed,");
  879. else
  880. Flags.append("{");
  881. if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
  882. Flags.append("flat,");
  883. switch (Entry.Type) {
  884. case SecNameTable:
  885. if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
  886. Flags.append("fixlenmd5,");
  887. else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
  888. Flags.append("md5,");
  889. break;
  890. case SecProfSummary:
  891. if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
  892. Flags.append("partial,");
  893. break;
  894. default:
  895. break;
  896. }
  897. char &last = Flags.back();
  898. if (last == ',')
  899. last = '}';
  900. else
  901. Flags.append("}");
  902. return Flags;
  903. }
  904. bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
  905. uint64_t TotalSecsSize = 0;
  906. for (auto &Entry : SecHdrTable) {
  907. OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
  908. << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
  909. << "\n";
  910. ;
  911. TotalSecsSize += Entry.Size;
  912. }
  913. uint64_t HeaderSize = SecHdrTable.front().Offset;
  914. assert(HeaderSize + TotalSecsSize == getFileSize() &&
  915. "Size of 'header + sections' doesn't match the total size of profile");
  916. OS << "Header Size: " << HeaderSize << "\n";
  917. OS << "Total Sections Size: " << TotalSecsSize << "\n";
  918. OS << "File Size: " << getFileSize() << "\n";
  919. return true;
  920. }
  921. std::error_code SampleProfileReaderBinary::readMagicIdent() {
  922. // Read and check the magic identifier.
  923. auto Magic = readNumber<uint64_t>();
  924. if (std::error_code EC = Magic.getError())
  925. return EC;
  926. else if (std::error_code EC = verifySPMagic(*Magic))
  927. return EC;
  928. // Read the version number.
  929. auto Version = readNumber<uint64_t>();
  930. if (std::error_code EC = Version.getError())
  931. return EC;
  932. else if (*Version != SPVersion())
  933. return sampleprof_error::unsupported_version;
  934. return sampleprof_error::success;
  935. }
  936. std::error_code SampleProfileReaderBinary::readHeader() {
  937. Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
  938. End = Data + Buffer->getBufferSize();
  939. if (std::error_code EC = readMagicIdent())
  940. return EC;
  941. if (std::error_code EC = readSummary())
  942. return EC;
  943. if (std::error_code EC = readNameTable())
  944. return EC;
  945. return sampleprof_error::success;
  946. }
  947. std::error_code SampleProfileReaderCompactBinary::readHeader() {
  948. SampleProfileReaderBinary::readHeader();
  949. if (std::error_code EC = readFuncOffsetTable())
  950. return EC;
  951. return sampleprof_error::success;
  952. }
  953. std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
  954. auto TableOffset = readUnencodedNumber<uint64_t>();
  955. if (std::error_code EC = TableOffset.getError())
  956. return EC;
  957. const uint8_t *SavedData = Data;
  958. const uint8_t *TableStart =
  959. reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
  960. *TableOffset;
  961. Data = TableStart;
  962. auto Size = readNumber<uint64_t>();
  963. if (std::error_code EC = Size.getError())
  964. return EC;
  965. FuncOffsetTable.reserve(*Size);
  966. for (uint32_t I = 0; I < *Size; ++I) {
  967. auto FName(readStringFromTable());
  968. if (std::error_code EC = FName.getError())
  969. return EC;
  970. auto Offset = readNumber<uint64_t>();
  971. if (std::error_code EC = Offset.getError())
  972. return EC;
  973. FuncOffsetTable[*FName] = *Offset;
  974. }
  975. End = TableStart;
  976. Data = SavedData;
  977. return sampleprof_error::success;
  978. }
  979. void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
  980. UseAllFuncs = false;
  981. FuncsToUse.clear();
  982. for (auto &F : M)
  983. FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
  984. }
  985. std::error_code SampleProfileReaderBinary::readSummaryEntry(
  986. std::vector<ProfileSummaryEntry> &Entries) {
  987. auto Cutoff = readNumber<uint64_t>();
  988. if (std::error_code EC = Cutoff.getError())
  989. return EC;
  990. auto MinBlockCount = readNumber<uint64_t>();
  991. if (std::error_code EC = MinBlockCount.getError())
  992. return EC;
  993. auto NumBlocks = readNumber<uint64_t>();
  994. if (std::error_code EC = NumBlocks.getError())
  995. return EC;
  996. Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
  997. return sampleprof_error::success;
  998. }
  999. std::error_code SampleProfileReaderBinary::readSummary() {
  1000. auto TotalCount = readNumber<uint64_t>();
  1001. if (std::error_code EC = TotalCount.getError())
  1002. return EC;
  1003. auto MaxBlockCount = readNumber<uint64_t>();
  1004. if (std::error_code EC = MaxBlockCount.getError())
  1005. return EC;
  1006. auto MaxFunctionCount = readNumber<uint64_t>();
  1007. if (std::error_code EC = MaxFunctionCount.getError())
  1008. return EC;
  1009. auto NumBlocks = readNumber<uint64_t>();
  1010. if (std::error_code EC = NumBlocks.getError())
  1011. return EC;
  1012. auto NumFunctions = readNumber<uint64_t>();
  1013. if (std::error_code EC = NumFunctions.getError())
  1014. return EC;
  1015. auto NumSummaryEntries = readNumber<uint64_t>();
  1016. if (std::error_code EC = NumSummaryEntries.getError())
  1017. return EC;
  1018. std::vector<ProfileSummaryEntry> Entries;
  1019. for (unsigned i = 0; i < *NumSummaryEntries; i++) {
  1020. std::error_code EC = readSummaryEntry(Entries);
  1021. if (EC != sampleprof_error::success)
  1022. return EC;
  1023. }
  1024. Summary = std::make_unique<ProfileSummary>(
  1025. ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
  1026. *MaxFunctionCount, *NumBlocks, *NumFunctions);
  1027. return sampleprof_error::success;
  1028. }
  1029. bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
  1030. const uint8_t *Data =
  1031. reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
  1032. uint64_t Magic = decodeULEB128(Data);
  1033. return Magic == SPMagic();
  1034. }
  1035. bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
  1036. const uint8_t *Data =
  1037. reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
  1038. uint64_t Magic = decodeULEB128(Data);
  1039. return Magic == SPMagic(SPF_Ext_Binary);
  1040. }
  1041. bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
  1042. const uint8_t *Data =
  1043. reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
  1044. uint64_t Magic = decodeULEB128(Data);
  1045. return Magic == SPMagic(SPF_Compact_Binary);
  1046. }
  1047. std::error_code SampleProfileReaderGCC::skipNextWord() {
  1048. uint32_t dummy;
  1049. if (!GcovBuffer.readInt(dummy))
  1050. return sampleprof_error::truncated;
  1051. return sampleprof_error::success;
  1052. }
  1053. template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
  1054. if (sizeof(T) <= sizeof(uint32_t)) {
  1055. uint32_t Val;
  1056. if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
  1057. return static_cast<T>(Val);
  1058. } else if (sizeof(T) <= sizeof(uint64_t)) {
  1059. uint64_t Val;
  1060. if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
  1061. return static_cast<T>(Val);
  1062. }
  1063. std::error_code EC = sampleprof_error::malformed;
  1064. reportError(0, EC.message());
  1065. return EC;
  1066. }
  1067. ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
  1068. StringRef Str;
  1069. if (!GcovBuffer.readString(Str))
  1070. return sampleprof_error::truncated;
  1071. return Str;
  1072. }
  1073. std::error_code SampleProfileReaderGCC::readHeader() {
  1074. // Read the magic identifier.
  1075. if (!GcovBuffer.readGCDAFormat())
  1076. return sampleprof_error::unrecognized_format;
  1077. // Read the version number. Note - the GCC reader does not validate this
  1078. // version, but the profile creator generates v704.
  1079. GCOV::GCOVVersion version;
  1080. if (!GcovBuffer.readGCOVVersion(version))
  1081. return sampleprof_error::unrecognized_format;
  1082. if (version != GCOV::V407)
  1083. return sampleprof_error::unsupported_version;
  1084. // Skip the empty integer.
  1085. if (std::error_code EC = skipNextWord())
  1086. return EC;
  1087. return sampleprof_error::success;
  1088. }
  1089. std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
  1090. uint32_t Tag;
  1091. if (!GcovBuffer.readInt(Tag))
  1092. return sampleprof_error::truncated;
  1093. if (Tag != Expected)
  1094. return sampleprof_error::malformed;
  1095. if (std::error_code EC = skipNextWord())
  1096. return EC;
  1097. return sampleprof_error::success;
  1098. }
  1099. std::error_code SampleProfileReaderGCC::readNameTable() {
  1100. if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
  1101. return EC;
  1102. uint32_t Size;
  1103. if (!GcovBuffer.readInt(Size))
  1104. return sampleprof_error::truncated;
  1105. for (uint32_t I = 0; I < Size; ++I) {
  1106. StringRef Str;
  1107. if (!GcovBuffer.readString(Str))
  1108. return sampleprof_error::truncated;
  1109. Names.push_back(std::string(Str));
  1110. }
  1111. return sampleprof_error::success;
  1112. }
  1113. std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
  1114. if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
  1115. return EC;
  1116. uint32_t NumFunctions;
  1117. if (!GcovBuffer.readInt(NumFunctions))
  1118. return sampleprof_error::truncated;
  1119. InlineCallStack Stack;
  1120. for (uint32_t I = 0; I < NumFunctions; ++I)
  1121. if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
  1122. return EC;
  1123. computeSummary();
  1124. return sampleprof_error::success;
  1125. }
  1126. std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
  1127. const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
  1128. uint64_t HeadCount = 0;
  1129. if (InlineStack.size() == 0)
  1130. if (!GcovBuffer.readInt64(HeadCount))
  1131. return sampleprof_error::truncated;
  1132. uint32_t NameIdx;
  1133. if (!GcovBuffer.readInt(NameIdx))
  1134. return sampleprof_error::truncated;
  1135. StringRef Name(Names[NameIdx]);
  1136. uint32_t NumPosCounts;
  1137. if (!GcovBuffer.readInt(NumPosCounts))
  1138. return sampleprof_error::truncated;
  1139. uint32_t NumCallsites;
  1140. if (!GcovBuffer.readInt(NumCallsites))
  1141. return sampleprof_error::truncated;
  1142. FunctionSamples *FProfile = nullptr;
  1143. if (InlineStack.size() == 0) {
  1144. // If this is a top function that we have already processed, do not
  1145. // update its profile again. This happens in the presence of
  1146. // function aliases. Since these aliases share the same function
  1147. // body, there will be identical replicated profiles for the
  1148. // original function. In this case, we simply not bother updating
  1149. // the profile of the original function.
  1150. FProfile = &Profiles[Name];
  1151. FProfile->addHeadSamples(HeadCount);
  1152. if (FProfile->getTotalSamples() > 0)
  1153. Update = false;
  1154. } else {
  1155. // Otherwise, we are reading an inlined instance. The top of the
  1156. // inline stack contains the profile of the caller. Insert this
  1157. // callee in the caller's CallsiteMap.
  1158. FunctionSamples *CallerProfile = InlineStack.front();
  1159. uint32_t LineOffset = Offset >> 16;
  1160. uint32_t Discriminator = Offset & 0xffff;
  1161. FProfile = &CallerProfile->functionSamplesAt(
  1162. LineLocation(LineOffset, Discriminator))[std::string(Name)];
  1163. }
  1164. FProfile->setName(Name);
  1165. for (uint32_t I = 0; I < NumPosCounts; ++I) {
  1166. uint32_t Offset;
  1167. if (!GcovBuffer.readInt(Offset))
  1168. return sampleprof_error::truncated;
  1169. uint32_t NumTargets;
  1170. if (!GcovBuffer.readInt(NumTargets))
  1171. return sampleprof_error::truncated;
  1172. uint64_t Count;
  1173. if (!GcovBuffer.readInt64(Count))
  1174. return sampleprof_error::truncated;
  1175. // The line location is encoded in the offset as:
  1176. // high 16 bits: line offset to the start of the function.
  1177. // low 16 bits: discriminator.
  1178. uint32_t LineOffset = Offset >> 16;
  1179. uint32_t Discriminator = Offset & 0xffff;
  1180. InlineCallStack NewStack;
  1181. NewStack.push_back(FProfile);
  1182. llvm::append_range(NewStack, InlineStack);
  1183. if (Update) {
  1184. // Walk up the inline stack, adding the samples on this line to
  1185. // the total sample count of the callers in the chain.
  1186. for (auto CallerProfile : NewStack)
  1187. CallerProfile->addTotalSamples(Count);
  1188. // Update the body samples for the current profile.
  1189. FProfile->addBodySamples(LineOffset, Discriminator, Count);
  1190. }
  1191. // Process the list of functions called at an indirect call site.
  1192. // These are all the targets that a function pointer (or virtual
  1193. // function) resolved at runtime.
  1194. for (uint32_t J = 0; J < NumTargets; J++) {
  1195. uint32_t HistVal;
  1196. if (!GcovBuffer.readInt(HistVal))
  1197. return sampleprof_error::truncated;
  1198. if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
  1199. return sampleprof_error::malformed;
  1200. uint64_t TargetIdx;
  1201. if (!GcovBuffer.readInt64(TargetIdx))
  1202. return sampleprof_error::truncated;
  1203. StringRef TargetName(Names[TargetIdx]);
  1204. uint64_t TargetCount;
  1205. if (!GcovBuffer.readInt64(TargetCount))
  1206. return sampleprof_error::truncated;
  1207. if (Update)
  1208. FProfile->addCalledTargetSamples(LineOffset, Discriminator,
  1209. TargetName, TargetCount);
  1210. }
  1211. }
  1212. // Process all the inlined callers into the current function. These
  1213. // are all the callsites that were inlined into this function.
  1214. for (uint32_t I = 0; I < NumCallsites; I++) {
  1215. // The offset is encoded as:
  1216. // high 16 bits: line offset to the start of the function.
  1217. // low 16 bits: discriminator.
  1218. uint32_t Offset;
  1219. if (!GcovBuffer.readInt(Offset))
  1220. return sampleprof_error::truncated;
  1221. InlineCallStack NewStack;
  1222. NewStack.push_back(FProfile);
  1223. llvm::append_range(NewStack, InlineStack);
  1224. if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
  1225. return EC;
  1226. }
  1227. return sampleprof_error::success;
  1228. }
  1229. /// Read a GCC AutoFDO profile.
  1230. ///
  1231. /// This format is generated by the Linux Perf conversion tool at
  1232. /// https://github.com/google/autofdo.
  1233. std::error_code SampleProfileReaderGCC::readImpl() {
  1234. // Read the string table.
  1235. if (std::error_code EC = readNameTable())
  1236. return EC;
  1237. // Read the source profile.
  1238. if (std::error_code EC = readFunctionProfiles())
  1239. return EC;
  1240. return sampleprof_error::success;
  1241. }
  1242. bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
  1243. StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
  1244. return Magic == "adcg*704";
  1245. }
  1246. void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
  1247. // If the reader uses MD5 to represent string, we can't remap it because
  1248. // we don't know what the original function names were.
  1249. if (Reader.useMD5()) {
  1250. Ctx.diagnose(DiagnosticInfoSampleProfile(
  1251. Reader.getBuffer()->getBufferIdentifier(),
  1252. "Profile data remapping cannot be applied to profile data "
  1253. "in compact format (original mangled names are not available).",
  1254. DS_Warning));
  1255. return;
  1256. }
  1257. // CSSPGO-TODO: Remapper is not yet supported.
  1258. // We will need to remap the entire context string.
  1259. assert(Remappings && "should be initialized while creating remapper");
  1260. for (auto &Sample : Reader.getProfiles()) {
  1261. DenseSet<StringRef> NamesInSample;
  1262. Sample.second.findAllNames(NamesInSample);
  1263. for (auto &Name : NamesInSample)
  1264. if (auto Key = Remappings->insert(Name))
  1265. NameMap.insert({Key, Name});
  1266. }
  1267. RemappingApplied = true;
  1268. }
  1269. Optional<StringRef>
  1270. SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
  1271. if (auto Key = Remappings->lookup(Fname))
  1272. return NameMap.lookup(Key);
  1273. return None;
  1274. }
  1275. /// Prepare a memory buffer for the contents of \p Filename.
  1276. ///
  1277. /// \returns an error code indicating the status of the buffer.
  1278. static ErrorOr<std::unique_ptr<MemoryBuffer>>
  1279. setupMemoryBuffer(const Twine &Filename) {
  1280. auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
  1281. if (std::error_code EC = BufferOrErr.getError())
  1282. return EC;
  1283. auto Buffer = std::move(BufferOrErr.get());
  1284. // Sanity check the file.
  1285. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
  1286. return sampleprof_error::too_large;
  1287. return std::move(Buffer);
  1288. }
  1289. /// Create a sample profile reader based on the format of the input file.
  1290. ///
  1291. /// \param Filename The file to open.
  1292. ///
  1293. /// \param C The LLVM context to use to emit diagnostics.
  1294. ///
  1295. /// \param RemapFilename The file used for profile remapping.
  1296. ///
  1297. /// \returns an error code indicating the status of the created reader.
  1298. ErrorOr<std::unique_ptr<SampleProfileReader>>
  1299. SampleProfileReader::create(const std::string Filename, LLVMContext &C,
  1300. const std::string RemapFilename) {
  1301. auto BufferOrError = setupMemoryBuffer(Filename);
  1302. if (std::error_code EC = BufferOrError.getError())
  1303. return EC;
  1304. return create(BufferOrError.get(), C, RemapFilename);
  1305. }
  1306. /// Create a sample profile remapper from the given input, to remap the
  1307. /// function names in the given profile data.
  1308. ///
  1309. /// \param Filename The file to open.
  1310. ///
  1311. /// \param Reader The profile reader the remapper is going to be applied to.
  1312. ///
  1313. /// \param C The LLVM context to use to emit diagnostics.
  1314. ///
  1315. /// \returns an error code indicating the status of the created reader.
  1316. ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
  1317. SampleProfileReaderItaniumRemapper::create(const std::string Filename,
  1318. SampleProfileReader &Reader,
  1319. LLVMContext &C) {
  1320. auto BufferOrError = setupMemoryBuffer(Filename);
  1321. if (std::error_code EC = BufferOrError.getError())
  1322. return EC;
  1323. return create(BufferOrError.get(), Reader, C);
  1324. }
  1325. /// Create a sample profile remapper from the given input, to remap the
  1326. /// function names in the given profile data.
  1327. ///
  1328. /// \param B The memory buffer to create the reader from (assumes ownership).
  1329. ///
  1330. /// \param C The LLVM context to use to emit diagnostics.
  1331. ///
  1332. /// \param Reader The profile reader the remapper is going to be applied to.
  1333. ///
  1334. /// \returns an error code indicating the status of the created reader.
  1335. ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
  1336. SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
  1337. SampleProfileReader &Reader,
  1338. LLVMContext &C) {
  1339. auto Remappings = std::make_unique<SymbolRemappingReader>();
  1340. if (Error E = Remappings->read(*B.get())) {
  1341. handleAllErrors(
  1342. std::move(E), [&](const SymbolRemappingParseError &ParseError) {
  1343. C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
  1344. ParseError.getLineNum(),
  1345. ParseError.getMessage()));
  1346. });
  1347. return sampleprof_error::malformed;
  1348. }
  1349. return std::make_unique<SampleProfileReaderItaniumRemapper>(
  1350. std::move(B), std::move(Remappings), Reader);
  1351. }
  1352. /// Create a sample profile reader based on the format of the input data.
  1353. ///
  1354. /// \param B The memory buffer to create the reader from (assumes ownership).
  1355. ///
  1356. /// \param C The LLVM context to use to emit diagnostics.
  1357. ///
  1358. /// \param RemapFilename The file used for profile remapping.
  1359. ///
  1360. /// \returns an error code indicating the status of the created reader.
  1361. ErrorOr<std::unique_ptr<SampleProfileReader>>
  1362. SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
  1363. const std::string RemapFilename) {
  1364. std::unique_ptr<SampleProfileReader> Reader;
  1365. if (SampleProfileReaderRawBinary::hasFormat(*B))
  1366. Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
  1367. else if (SampleProfileReaderExtBinary::hasFormat(*B))
  1368. Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
  1369. else if (SampleProfileReaderCompactBinary::hasFormat(*B))
  1370. Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
  1371. else if (SampleProfileReaderGCC::hasFormat(*B))
  1372. Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
  1373. else if (SampleProfileReaderText::hasFormat(*B))
  1374. Reader.reset(new SampleProfileReaderText(std::move(B), C));
  1375. else
  1376. return sampleprof_error::unrecognized_format;
  1377. if (!RemapFilename.empty()) {
  1378. auto ReaderOrErr =
  1379. SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
  1380. if (std::error_code EC = ReaderOrErr.getError()) {
  1381. std::string Msg = "Could not create remapper: " + EC.message();
  1382. C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
  1383. return EC;
  1384. }
  1385. Reader->Remapper = std::move(ReaderOrErr.get());
  1386. }
  1387. FunctionSamples::Format = Reader->getFormat();
  1388. if (std::error_code EC = Reader->readHeader()) {
  1389. return EC;
  1390. }
  1391. return std::move(Reader);
  1392. }
  1393. // For text and GCC file formats, we compute the summary after reading the
  1394. // profile. Binary format has the profile summary in its header.
  1395. void SampleProfileReader::computeSummary() {
  1396. SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
  1397. Summary = Builder.computeSummaryForProfiles(Profiles);
  1398. }