InstrProfReader.cpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains support for reading profiling data for clang's
  10. // instrumentation based PGO and coverage.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ProfileData/InstrProfReader.h"
  14. #include "llvm/ADT/ArrayRef.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/StringExtras.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/IR/ProfileSummary.h"
  20. #include "llvm/ProfileData/InstrProf.h"
  21. #include "llvm/ProfileData/ProfileCommon.h"
  22. #include "llvm/Support/Endian.h"
  23. #include "llvm/Support/Error.h"
  24. #include "llvm/Support/ErrorOr.h"
  25. #include "llvm/Support/MemoryBuffer.h"
  26. #include "llvm/Support/SwapByteOrder.h"
  27. #include "llvm/Support/SymbolRemappingReader.h"
  28. #include <algorithm>
  29. #include <cctype>
  30. #include <cstddef>
  31. #include <cstdint>
  32. #include <limits>
  33. #include <memory>
  34. #include <system_error>
  35. #include <utility>
  36. #include <vector>
  37. using namespace llvm;
  38. // Extracts the variant information from the top 8 bits in the version and
  39. // returns an enum specifying the variants present.
  40. static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
  41. InstrProfKind ProfileKind = InstrProfKind::Unknown;
  42. if (Version & VARIANT_MASK_IR_PROF) {
  43. ProfileKind |= InstrProfKind::IR;
  44. }
  45. if (Version & VARIANT_MASK_CSIR_PROF) {
  46. ProfileKind |= InstrProfKind::CS;
  47. }
  48. if (Version & VARIANT_MASK_INSTR_ENTRY) {
  49. ProfileKind |= InstrProfKind::BB;
  50. }
  51. if (Version & VARIANT_MASK_BYTE_COVERAGE) {
  52. ProfileKind |= InstrProfKind::SingleByteCoverage;
  53. }
  54. if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
  55. ProfileKind |= InstrProfKind::FunctionEntryOnly;
  56. }
  57. return ProfileKind;
  58. }
  59. static Expected<std::unique_ptr<MemoryBuffer>>
  60. setupMemoryBuffer(const Twine &Path) {
  61. ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
  62. MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
  63. if (std::error_code EC = BufferOrErr.getError())
  64. return errorCodeToError(EC);
  65. return std::move(BufferOrErr.get());
  66. }
  67. static Error initializeReader(InstrProfReader &Reader) {
  68. return Reader.readHeader();
  69. }
  70. Expected<std::unique_ptr<InstrProfReader>>
  71. InstrProfReader::create(const Twine &Path,
  72. const InstrProfCorrelator *Correlator) {
  73. // Set up the buffer to read.
  74. auto BufferOrError = setupMemoryBuffer(Path);
  75. if (Error E = BufferOrError.takeError())
  76. return std::move(E);
  77. return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
  78. }
  79. Expected<std::unique_ptr<InstrProfReader>>
  80. InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
  81. const InstrProfCorrelator *Correlator) {
  82. // Sanity check the buffer.
  83. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
  84. return make_error<InstrProfError>(instrprof_error::too_large);
  85. if (Buffer->getBufferSize() == 0)
  86. return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
  87. std::unique_ptr<InstrProfReader> Result;
  88. // Create the reader.
  89. if (IndexedInstrProfReader::hasFormat(*Buffer))
  90. Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
  91. else if (RawInstrProfReader64::hasFormat(*Buffer))
  92. Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator));
  93. else if (RawInstrProfReader32::hasFormat(*Buffer))
  94. Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator));
  95. else if (TextInstrProfReader::hasFormat(*Buffer))
  96. Result.reset(new TextInstrProfReader(std::move(Buffer)));
  97. else
  98. return make_error<InstrProfError>(instrprof_error::unrecognized_format);
  99. // Initialize the reader and return the result.
  100. if (Error E = initializeReader(*Result))
  101. return std::move(E);
  102. return std::move(Result);
  103. }
  104. Expected<std::unique_ptr<IndexedInstrProfReader>>
  105. IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
  106. // Set up the buffer to read.
  107. auto BufferOrError = setupMemoryBuffer(Path);
  108. if (Error E = BufferOrError.takeError())
  109. return std::move(E);
  110. // Set up the remapping buffer if requested.
  111. std::unique_ptr<MemoryBuffer> RemappingBuffer;
  112. std::string RemappingPathStr = RemappingPath.str();
  113. if (!RemappingPathStr.empty()) {
  114. auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
  115. if (Error E = RemappingBufferOrError.takeError())
  116. return std::move(E);
  117. RemappingBuffer = std::move(RemappingBufferOrError.get());
  118. }
  119. return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
  120. std::move(RemappingBuffer));
  121. }
  122. Expected<std::unique_ptr<IndexedInstrProfReader>>
  123. IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
  124. std::unique_ptr<MemoryBuffer> RemappingBuffer) {
  125. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
  126. return make_error<InstrProfError>(instrprof_error::too_large);
  127. // Create the reader.
  128. if (!IndexedInstrProfReader::hasFormat(*Buffer))
  129. return make_error<InstrProfError>(instrprof_error::bad_magic);
  130. auto Result = std::make_unique<IndexedInstrProfReader>(
  131. std::move(Buffer), std::move(RemappingBuffer));
  132. // Initialize the reader and return the result.
  133. if (Error E = initializeReader(*Result))
  134. return std::move(E);
  135. return std::move(Result);
  136. }
  137. void InstrProfIterator::Increment() {
  138. if (auto E = Reader->readNextRecord(Record)) {
  139. // Handle errors in the reader.
  140. InstrProfError::take(std::move(E));
  141. *this = InstrProfIterator();
  142. }
  143. }
  144. bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
  145. // Verify that this really looks like plain ASCII text by checking a
  146. // 'reasonable' number of characters (up to profile magic size).
  147. size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
  148. StringRef buffer = Buffer.getBufferStart();
  149. return count == 0 ||
  150. std::all_of(buffer.begin(), buffer.begin() + count,
  151. [](char c) { return isPrint(c) || isSpace(c); });
  152. }
  153. // Read the profile variant flag from the header: ":FE" means this is a FE
  154. // generated profile. ":IR" means this is an IR level profile. Other strings
  155. // with a leading ':' will be reported an error format.
  156. Error TextInstrProfReader::readHeader() {
  157. Symtab.reset(new InstrProfSymtab());
  158. while (Line->startswith(":")) {
  159. StringRef Str = Line->substr(1);
  160. if (Str.equals_insensitive("ir"))
  161. ProfileKind |= InstrProfKind::IR;
  162. else if (Str.equals_insensitive("fe"))
  163. ProfileKind |= InstrProfKind::FE;
  164. else if (Str.equals_insensitive("csir")) {
  165. ProfileKind |= InstrProfKind::IR;
  166. ProfileKind |= InstrProfKind::CS;
  167. } else if (Str.equals_insensitive("entry_first"))
  168. ProfileKind |= InstrProfKind::BB;
  169. else if (Str.equals_insensitive("not_entry_first"))
  170. ProfileKind &= ~InstrProfKind::BB;
  171. else
  172. return error(instrprof_error::bad_header);
  173. ++Line;
  174. }
  175. return success();
  176. }
  177. Error
  178. TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
  179. #define CHECK_LINE_END(Line) \
  180. if (Line.is_at_end()) \
  181. return error(instrprof_error::truncated);
  182. #define READ_NUM(Str, Dst) \
  183. if ((Str).getAsInteger(10, (Dst))) \
  184. return error(instrprof_error::malformed);
  185. #define VP_READ_ADVANCE(Val) \
  186. CHECK_LINE_END(Line); \
  187. uint32_t Val; \
  188. READ_NUM((*Line), (Val)); \
  189. Line++;
  190. if (Line.is_at_end())
  191. return success();
  192. uint32_t NumValueKinds;
  193. if (Line->getAsInteger(10, NumValueKinds)) {
  194. // No value profile data
  195. return success();
  196. }
  197. if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
  198. return error(instrprof_error::malformed,
  199. "number of value kinds is invalid");
  200. Line++;
  201. for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
  202. VP_READ_ADVANCE(ValueKind);
  203. if (ValueKind > IPVK_Last)
  204. return error(instrprof_error::malformed, "value kind is invalid");
  205. ;
  206. VP_READ_ADVANCE(NumValueSites);
  207. if (!NumValueSites)
  208. continue;
  209. Record.reserveSites(VK, NumValueSites);
  210. for (uint32_t S = 0; S < NumValueSites; S++) {
  211. VP_READ_ADVANCE(NumValueData);
  212. std::vector<InstrProfValueData> CurrentValues;
  213. for (uint32_t V = 0; V < NumValueData; V++) {
  214. CHECK_LINE_END(Line);
  215. std::pair<StringRef, StringRef> VD = Line->rsplit(':');
  216. uint64_t TakenCount, Value;
  217. if (ValueKind == IPVK_IndirectCallTarget) {
  218. if (InstrProfSymtab::isExternalSymbol(VD.first)) {
  219. Value = 0;
  220. } else {
  221. if (Error E = Symtab->addFuncName(VD.first))
  222. return E;
  223. Value = IndexedInstrProf::ComputeHash(VD.first);
  224. }
  225. } else {
  226. READ_NUM(VD.first, Value);
  227. }
  228. READ_NUM(VD.second, TakenCount);
  229. CurrentValues.push_back({Value, TakenCount});
  230. Line++;
  231. }
  232. Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
  233. nullptr);
  234. }
  235. }
  236. return success();
  237. #undef CHECK_LINE_END
  238. #undef READ_NUM
  239. #undef VP_READ_ADVANCE
  240. }
  241. Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
  242. // Skip empty lines and comments.
  243. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
  244. ++Line;
  245. // If we hit EOF while looking for a name, we're done.
  246. if (Line.is_at_end()) {
  247. return error(instrprof_error::eof);
  248. }
  249. // Read the function name.
  250. Record.Name = *Line++;
  251. if (Error E = Symtab->addFuncName(Record.Name))
  252. return error(std::move(E));
  253. // Read the function hash.
  254. if (Line.is_at_end())
  255. return error(instrprof_error::truncated);
  256. if ((Line++)->getAsInteger(0, Record.Hash))
  257. return error(instrprof_error::malformed,
  258. "function hash is not a valid integer");
  259. // Read the number of counters.
  260. uint64_t NumCounters;
  261. if (Line.is_at_end())
  262. return error(instrprof_error::truncated);
  263. if ((Line++)->getAsInteger(10, NumCounters))
  264. return error(instrprof_error::malformed,
  265. "number of counters is not a valid integer");
  266. if (NumCounters == 0)
  267. return error(instrprof_error::malformed, "number of counters is zero");
  268. // Read each counter and fill our internal storage with the values.
  269. Record.Clear();
  270. Record.Counts.reserve(NumCounters);
  271. for (uint64_t I = 0; I < NumCounters; ++I) {
  272. if (Line.is_at_end())
  273. return error(instrprof_error::truncated);
  274. uint64_t Count;
  275. if ((Line++)->getAsInteger(10, Count))
  276. return error(instrprof_error::malformed, "count is invalid");
  277. Record.Counts.push_back(Count);
  278. }
  279. // Check if value profile data exists and read it if so.
  280. if (Error E = readValueProfileData(Record))
  281. return error(std::move(E));
  282. return success();
  283. }
  284. template <class IntPtrT>
  285. InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
  286. return getProfileKindFromVersion(Version);
  287. }
  288. template <class IntPtrT>
  289. bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
  290. if (DataBuffer.getBufferSize() < sizeof(uint64_t))
  291. return false;
  292. uint64_t Magic =
  293. *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
  294. return RawInstrProf::getMagic<IntPtrT>() == Magic ||
  295. sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
  296. }
  297. template <class IntPtrT>
  298. Error RawInstrProfReader<IntPtrT>::readHeader() {
  299. if (!hasFormat(*DataBuffer))
  300. return error(instrprof_error::bad_magic);
  301. if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
  302. return error(instrprof_error::bad_header);
  303. auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
  304. DataBuffer->getBufferStart());
  305. ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
  306. return readHeader(*Header);
  307. }
  308. template <class IntPtrT>
  309. Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
  310. const char *End = DataBuffer->getBufferEnd();
  311. // Skip zero padding between profiles.
  312. while (CurrentPos != End && *CurrentPos == 0)
  313. ++CurrentPos;
  314. // If there's nothing left, we're done.
  315. if (CurrentPos == End)
  316. return make_error<InstrProfError>(instrprof_error::eof);
  317. // If there isn't enough space for another header, this is probably just
  318. // garbage at the end of the file.
  319. if (CurrentPos + sizeof(RawInstrProf::Header) > End)
  320. return make_error<InstrProfError>(instrprof_error::malformed,
  321. "not enough space for another header");
  322. // The writer ensures each profile is padded to start at an aligned address.
  323. if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
  324. return make_error<InstrProfError>(instrprof_error::malformed,
  325. "insufficient padding");
  326. // The magic should have the same byte order as in the previous header.
  327. uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
  328. if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
  329. return make_error<InstrProfError>(instrprof_error::bad_magic);
  330. // There's another profile to read, so we need to process the header.
  331. auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
  332. return readHeader(*Header);
  333. }
  334. template <class IntPtrT>
  335. Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
  336. if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart)))
  337. return error(std::move(E));
  338. for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
  339. const IntPtrT FPtr = swap(I->FunctionPointer);
  340. if (!FPtr)
  341. continue;
  342. Symtab.mapAddress(FPtr, I->NameRef);
  343. }
  344. return success();
  345. }
  346. template <class IntPtrT>
  347. Error RawInstrProfReader<IntPtrT>::readHeader(
  348. const RawInstrProf::Header &Header) {
  349. Version = swap(Header.Version);
  350. if (GET_VERSION(Version) != RawInstrProf::Version)
  351. return error(instrprof_error::unsupported_version);
  352. if (useDebugInfoCorrelate() && !Correlator)
  353. return error(instrprof_error::missing_debug_info_for_correlation);
  354. if (!useDebugInfoCorrelate() && Correlator)
  355. return error(instrprof_error::unexpected_debug_info_for_correlation);
  356. BinaryIdsSize = swap(Header.BinaryIdsSize);
  357. if (BinaryIdsSize % sizeof(uint64_t))
  358. return error(instrprof_error::bad_header);
  359. CountersDelta = swap(Header.CountersDelta);
  360. NamesDelta = swap(Header.NamesDelta);
  361. auto NumData = swap(Header.DataSize);
  362. auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
  363. auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize();
  364. auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
  365. auto NamesSize = swap(Header.NamesSize);
  366. ValueKindLast = swap(Header.ValueKindLast);
  367. auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>);
  368. auto PaddingSize = getNumPaddingBytes(NamesSize);
  369. // Profile data starts after profile header and binary ids if exist.
  370. ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize;
  371. ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters;
  372. ptrdiff_t NamesOffset =
  373. CountersOffset + CountersSize + PaddingBytesAfterCounters;
  374. ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
  375. auto *Start = reinterpret_cast<const char *>(&Header);
  376. if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
  377. return error(instrprof_error::bad_header);
  378. if (Correlator) {
  379. // These sizes in the raw file are zero because we constructed them in the
  380. // Correlator.
  381. assert(DataSize == 0 && NamesSize == 0);
  382. assert(CountersDelta == 0 && NamesDelta == 0);
  383. Data = Correlator->getDataPointer();
  384. DataEnd = Data + Correlator->getDataSize();
  385. NamesStart = Correlator->getNamesPointer();
  386. NamesEnd = NamesStart + Correlator->getNamesSize();
  387. } else {
  388. Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
  389. Start + DataOffset);
  390. DataEnd = Data + NumData;
  391. NamesStart = Start + NamesOffset;
  392. NamesEnd = NamesStart + NamesSize;
  393. }
  394. // Binary ids start just after the header.
  395. BinaryIdsStart =
  396. reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header);
  397. CountersStart = Start + CountersOffset;
  398. CountersEnd = CountersStart + CountersSize;
  399. ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
  400. const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
  401. if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
  402. return error(instrprof_error::bad_header);
  403. std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
  404. if (Error E = createSymtab(*NewSymtab.get()))
  405. return E;
  406. Symtab = std::move(NewSymtab);
  407. return success();
  408. }
  409. template <class IntPtrT>
  410. Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
  411. Record.Name = getName(Data->NameRef);
  412. return success();
  413. }
  414. template <class IntPtrT>
  415. Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
  416. Record.Hash = swap(Data->FuncHash);
  417. return success();
  418. }
  419. template <class IntPtrT>
  420. Error RawInstrProfReader<IntPtrT>::readRawCounts(
  421. InstrProfRecord &Record) {
  422. uint32_t NumCounters = swap(Data->NumCounters);
  423. if (NumCounters == 0)
  424. return error(instrprof_error::malformed, "number of counters is zero");
  425. ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta;
  426. if (CounterBaseOffset < 0)
  427. return error(
  428. instrprof_error::malformed,
  429. ("counter offset " + Twine(CounterBaseOffset) + " is negative").str());
  430. if (CounterBaseOffset >= CountersEnd - CountersStart)
  431. return error(instrprof_error::malformed,
  432. ("counter offset " + Twine(CounterBaseOffset) +
  433. " is greater than the maximum counter offset " +
  434. Twine(CountersEnd - CountersStart - 1))
  435. .str());
  436. uint64_t MaxNumCounters =
  437. (CountersEnd - (CountersStart + CounterBaseOffset)) /
  438. getCounterTypeSize();
  439. if (NumCounters > MaxNumCounters)
  440. return error(instrprof_error::malformed,
  441. ("number of counters " + Twine(NumCounters) +
  442. " is greater than the maximum number of counters " +
  443. Twine(MaxNumCounters))
  444. .str());
  445. Record.Counts.clear();
  446. Record.Counts.reserve(NumCounters);
  447. for (uint32_t I = 0; I < NumCounters; I++) {
  448. const char *Ptr =
  449. CountersStart + CounterBaseOffset + I * getCounterTypeSize();
  450. if (hasSingleByteCoverage()) {
  451. // A value of zero signifies the block is covered.
  452. Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
  453. } else {
  454. const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr);
  455. Record.Counts.push_back(swap(*CounterValue));
  456. }
  457. }
  458. return success();
  459. }
  460. template <class IntPtrT>
  461. Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
  462. InstrProfRecord &Record) {
  463. Record.clearValueData();
  464. CurValueDataSize = 0;
  465. // Need to match the logic in value profile dumper code in compiler-rt:
  466. uint32_t NumValueKinds = 0;
  467. for (uint32_t I = 0; I < IPVK_Last + 1; I++)
  468. NumValueKinds += (Data->NumValueSites[I] != 0);
  469. if (!NumValueKinds)
  470. return success();
  471. Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
  472. ValueProfData::getValueProfData(
  473. ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
  474. getDataEndianness());
  475. if (Error E = VDataPtrOrErr.takeError())
  476. return E;
  477. // Note that besides deserialization, this also performs the conversion for
  478. // indirect call targets. The function pointers from the raw profile are
  479. // remapped into function name hashes.
  480. VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
  481. CurValueDataSize = VDataPtrOrErr.get()->getSize();
  482. return success();
  483. }
  484. template <class IntPtrT>
  485. Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
  486. if (atEnd())
  487. // At this point, ValueDataStart field points to the next header.
  488. if (Error E = readNextHeader(getNextHeaderPos()))
  489. return error(std::move(E));
  490. // Read name ad set it in Record.
  491. if (Error E = readName(Record))
  492. return error(std::move(E));
  493. // Read FuncHash and set it in Record.
  494. if (Error E = readFuncHash(Record))
  495. return error(std::move(E));
  496. // Read raw counts and set Record.
  497. if (Error E = readRawCounts(Record))
  498. return error(std::move(E));
  499. // Read value data and set Record.
  500. if (Error E = readValueProfilingData(Record))
  501. return error(std::move(E));
  502. // Iterate.
  503. advanceData();
  504. return success();
  505. }
  506. static size_t RoundUp(size_t size, size_t align) {
  507. return (size + align - 1) & ~(align - 1);
  508. }
  509. template <class IntPtrT>
  510. Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
  511. if (BinaryIdsSize == 0)
  512. return success();
  513. OS << "Binary IDs: \n";
  514. const uint8_t *BI = BinaryIdsStart;
  515. const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
  516. while (BI < BIEnd) {
  517. size_t Remaining = BIEnd - BI;
  518. // There should be enough left to read the binary ID size field.
  519. if (Remaining < sizeof(uint64_t))
  520. return make_error<InstrProfError>(
  521. instrprof_error::malformed,
  522. "not enough data to read binary id length");
  523. uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
  524. // There should be enough left to read the binary ID size field, and the
  525. // binary ID.
  526. if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
  527. return make_error<InstrProfError>(
  528. instrprof_error::malformed, "not enough data to read binary id data");
  529. // Increment by binary id length data type size.
  530. BI += sizeof(BinaryIdLen);
  531. if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
  532. return make_error<InstrProfError>(
  533. instrprof_error::malformed,
  534. "binary id that is read is bigger than buffer size");
  535. for (uint64_t I = 0; I < BinaryIdLen; I++)
  536. OS << format("%02x", BI[I]);
  537. OS << "\n";
  538. // Increment by binary id data length, rounded to the next 8 bytes. This
  539. // accounts for the zero-padding after each build ID.
  540. BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
  541. if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
  542. return make_error<InstrProfError>(instrprof_error::malformed);
  543. }
  544. return success();
  545. }
  546. namespace llvm {
  547. template class RawInstrProfReader<uint32_t>;
  548. template class RawInstrProfReader<uint64_t>;
  549. } // end namespace llvm
  550. InstrProfLookupTrait::hash_value_type
  551. InstrProfLookupTrait::ComputeHash(StringRef K) {
  552. return IndexedInstrProf::ComputeHash(HashType, K);
  553. }
  554. using data_type = InstrProfLookupTrait::data_type;
  555. using offset_type = InstrProfLookupTrait::offset_type;
  556. bool InstrProfLookupTrait::readValueProfilingData(
  557. const unsigned char *&D, const unsigned char *const End) {
  558. Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
  559. ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
  560. if (VDataPtrOrErr.takeError())
  561. return false;
  562. VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
  563. D += VDataPtrOrErr.get()->TotalSize;
  564. return true;
  565. }
  566. data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
  567. offset_type N) {
  568. using namespace support;
  569. // Check if the data is corrupt. If so, don't try to read it.
  570. if (N % sizeof(uint64_t))
  571. return data_type();
  572. DataBuffer.clear();
  573. std::vector<uint64_t> CounterBuffer;
  574. const unsigned char *End = D + N;
  575. while (D < End) {
  576. // Read hash.
  577. if (D + sizeof(uint64_t) >= End)
  578. return data_type();
  579. uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
  580. // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
  581. uint64_t CountsSize = N / sizeof(uint64_t) - 1;
  582. // If format version is different then read the number of counters.
  583. if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
  584. if (D + sizeof(uint64_t) > End)
  585. return data_type();
  586. CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
  587. }
  588. // Read counter values.
  589. if (D + CountsSize * sizeof(uint64_t) > End)
  590. return data_type();
  591. CounterBuffer.clear();
  592. CounterBuffer.reserve(CountsSize);
  593. for (uint64_t J = 0; J < CountsSize; ++J)
  594. CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
  595. DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
  596. // Read value profiling data.
  597. if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
  598. !readValueProfilingData(D, End)) {
  599. DataBuffer.clear();
  600. return data_type();
  601. }
  602. }
  603. return DataBuffer;
  604. }
  605. template <typename HashTableImpl>
  606. Error InstrProfReaderIndex<HashTableImpl>::getRecords(
  607. StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
  608. auto Iter = HashTable->find(FuncName);
  609. if (Iter == HashTable->end())
  610. return make_error<InstrProfError>(instrprof_error::unknown_function);
  611. Data = (*Iter);
  612. if (Data.empty())
  613. return make_error<InstrProfError>(instrprof_error::malformed,
  614. "profile data is empty");
  615. return Error::success();
  616. }
  617. template <typename HashTableImpl>
  618. Error InstrProfReaderIndex<HashTableImpl>::getRecords(
  619. ArrayRef<NamedInstrProfRecord> &Data) {
  620. if (atEnd())
  621. return make_error<InstrProfError>(instrprof_error::eof);
  622. Data = *RecordIterator;
  623. if (Data.empty())
  624. return make_error<InstrProfError>(instrprof_error::malformed,
  625. "profile data is empty");
  626. return Error::success();
  627. }
  628. template <typename HashTableImpl>
  629. InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
  630. const unsigned char *Buckets, const unsigned char *const Payload,
  631. const unsigned char *const Base, IndexedInstrProf::HashT HashType,
  632. uint64_t Version) {
  633. FormatVersion = Version;
  634. HashTable.reset(HashTableImpl::Create(
  635. Buckets, Payload, Base,
  636. typename HashTableImpl::InfoType(HashType, Version)));
  637. RecordIterator = HashTable->data_begin();
  638. }
  639. template <typename HashTableImpl>
  640. InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const {
  641. return getProfileKindFromVersion(FormatVersion);
  642. }
  643. namespace {
  644. /// A remapper that does not apply any remappings.
  645. class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
  646. InstrProfReaderIndexBase &Underlying;
  647. public:
  648. InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
  649. : Underlying(Underlying) {}
  650. Error getRecords(StringRef FuncName,
  651. ArrayRef<NamedInstrProfRecord> &Data) override {
  652. return Underlying.getRecords(FuncName, Data);
  653. }
  654. };
  655. } // namespace
  656. /// A remapper that applies remappings based on a symbol remapping file.
  657. template <typename HashTableImpl>
  658. class llvm::InstrProfReaderItaniumRemapper
  659. : public InstrProfReaderRemapper {
  660. public:
  661. InstrProfReaderItaniumRemapper(
  662. std::unique_ptr<MemoryBuffer> RemapBuffer,
  663. InstrProfReaderIndex<HashTableImpl> &Underlying)
  664. : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
  665. }
  666. /// Extract the original function name from a PGO function name.
  667. static StringRef extractName(StringRef Name) {
  668. // We can have multiple :-separated pieces; there can be pieces both
  669. // before and after the mangled name. Find the first part that starts
  670. // with '_Z'; we'll assume that's the mangled name we want.
  671. std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
  672. while (true) {
  673. Parts = Parts.second.split(':');
  674. if (Parts.first.startswith("_Z"))
  675. return Parts.first;
  676. if (Parts.second.empty())
  677. return Name;
  678. }
  679. }
  680. /// Given a mangled name extracted from a PGO function name, and a new
  681. /// form for that mangled name, reconstitute the name.
  682. static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
  683. StringRef Replacement,
  684. SmallVectorImpl<char> &Out) {
  685. Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
  686. Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
  687. Out.insert(Out.end(), Replacement.begin(), Replacement.end());
  688. Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
  689. }
  690. Error populateRemappings() override {
  691. if (Error E = Remappings.read(*RemapBuffer))
  692. return E;
  693. for (StringRef Name : Underlying.HashTable->keys()) {
  694. StringRef RealName = extractName(Name);
  695. if (auto Key = Remappings.insert(RealName)) {
  696. // FIXME: We could theoretically map the same equivalence class to
  697. // multiple names in the profile data. If that happens, we should
  698. // return NamedInstrProfRecords from all of them.
  699. MappedNames.insert({Key, RealName});
  700. }
  701. }
  702. return Error::success();
  703. }
  704. Error getRecords(StringRef FuncName,
  705. ArrayRef<NamedInstrProfRecord> &Data) override {
  706. StringRef RealName = extractName(FuncName);
  707. if (auto Key = Remappings.lookup(RealName)) {
  708. StringRef Remapped = MappedNames.lookup(Key);
  709. if (!Remapped.empty()) {
  710. if (RealName.begin() == FuncName.begin() &&
  711. RealName.end() == FuncName.end())
  712. FuncName = Remapped;
  713. else {
  714. // Try rebuilding the name from the given remapping.
  715. SmallString<256> Reconstituted;
  716. reconstituteName(FuncName, RealName, Remapped, Reconstituted);
  717. Error E = Underlying.getRecords(Reconstituted, Data);
  718. if (!E)
  719. return E;
  720. // If we failed because the name doesn't exist, fall back to asking
  721. // about the original name.
  722. if (Error Unhandled = handleErrors(
  723. std::move(E), [](std::unique_ptr<InstrProfError> Err) {
  724. return Err->get() == instrprof_error::unknown_function
  725. ? Error::success()
  726. : Error(std::move(Err));
  727. }))
  728. return Unhandled;
  729. }
  730. }
  731. }
  732. return Underlying.getRecords(FuncName, Data);
  733. }
  734. private:
  735. /// The memory buffer containing the remapping configuration. Remappings
  736. /// holds pointers into this buffer.
  737. std::unique_ptr<MemoryBuffer> RemapBuffer;
  738. /// The mangling remapper.
  739. SymbolRemappingReader Remappings;
  740. /// Mapping from mangled name keys to the name used for the key in the
  741. /// profile data.
  742. /// FIXME: Can we store a location within the on-disk hash table instead of
  743. /// redoing lookup?
  744. DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
  745. /// The real profile data reader.
  746. InstrProfReaderIndex<HashTableImpl> &Underlying;
  747. };
  748. bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
  749. using namespace support;
  750. if (DataBuffer.getBufferSize() < 8)
  751. return false;
  752. uint64_t Magic =
  753. endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
  754. // Verify that it's magical.
  755. return Magic == IndexedInstrProf::Magic;
  756. }
  757. const unsigned char *
  758. IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
  759. const unsigned char *Cur, bool UseCS) {
  760. using namespace IndexedInstrProf;
  761. using namespace support;
  762. if (Version >= IndexedInstrProf::Version4) {
  763. const IndexedInstrProf::Summary *SummaryInLE =
  764. reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
  765. uint64_t NFields =
  766. endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
  767. uint64_t NEntries =
  768. endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
  769. uint32_t SummarySize =
  770. IndexedInstrProf::Summary::getSize(NFields, NEntries);
  771. std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
  772. IndexedInstrProf::allocSummary(SummarySize);
  773. const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
  774. uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
  775. for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
  776. Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
  777. SummaryEntryVector DetailedSummary;
  778. for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
  779. const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
  780. DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
  781. Ent.NumBlocks);
  782. }
  783. std::unique_ptr<llvm::ProfileSummary> &Summary =
  784. UseCS ? this->CS_Summary : this->Summary;
  785. // initialize InstrProfSummary using the SummaryData from disk.
  786. Summary = std::make_unique<ProfileSummary>(
  787. UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
  788. DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
  789. SummaryData->get(Summary::MaxBlockCount),
  790. SummaryData->get(Summary::MaxInternalBlockCount),
  791. SummaryData->get(Summary::MaxFunctionCount),
  792. SummaryData->get(Summary::TotalNumBlocks),
  793. SummaryData->get(Summary::TotalNumFunctions));
  794. return Cur + SummarySize;
  795. } else {
  796. // The older versions do not support a profile summary. This just computes
  797. // an empty summary, which will not result in accurate hot/cold detection.
  798. // We would need to call addRecord for all NamedInstrProfRecords to get the
  799. // correct summary. However, this version is old (prior to early 2016) and
  800. // has not been supporting an accurate summary for several years.
  801. InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
  802. Summary = Builder.getSummary();
  803. return Cur;
  804. }
  805. }
  806. Error IndexedInstrProfReader::readHeader() {
  807. using namespace support;
  808. const unsigned char *Start =
  809. (const unsigned char *)DataBuffer->getBufferStart();
  810. const unsigned char *Cur = Start;
  811. if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
  812. return error(instrprof_error::truncated);
  813. auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
  814. Cur += sizeof(IndexedInstrProf::Header);
  815. // Check the magic number.
  816. uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
  817. if (Magic != IndexedInstrProf::Magic)
  818. return error(instrprof_error::bad_magic);
  819. // Read the version.
  820. uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
  821. if (GET_VERSION(FormatVersion) >
  822. IndexedInstrProf::ProfVersion::CurrentVersion)
  823. return error(instrprof_error::unsupported_version);
  824. Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
  825. /* UseCS */ false);
  826. if (FormatVersion & VARIANT_MASK_CSIR_PROF)
  827. Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
  828. /* UseCS */ true);
  829. // Read the hash type and start offset.
  830. IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
  831. endian::byte_swap<uint64_t, little>(Header->HashType));
  832. if (HashType > IndexedInstrProf::HashT::Last)
  833. return error(instrprof_error::unsupported_hash_type);
  834. uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
  835. // The rest of the file is an on disk hash table.
  836. auto IndexPtr =
  837. std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
  838. Start + HashOffset, Cur, Start, HashType, FormatVersion);
  839. // Load the remapping table now if requested.
  840. if (RemappingBuffer) {
  841. Remapper = std::make_unique<
  842. InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
  843. std::move(RemappingBuffer), *IndexPtr);
  844. if (Error E = Remapper->populateRemappings())
  845. return E;
  846. } else {
  847. Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
  848. }
  849. Index = std::move(IndexPtr);
  850. return success();
  851. }
  852. InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
  853. if (Symtab.get())
  854. return *Symtab.get();
  855. std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
  856. if (Error E = Index->populateSymtab(*NewSymtab.get())) {
  857. consumeError(error(InstrProfError::take(std::move(E))));
  858. }
  859. Symtab = std::move(NewSymtab);
  860. return *Symtab.get();
  861. }
  862. Expected<InstrProfRecord>
  863. IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
  864. uint64_t FuncHash) {
  865. ArrayRef<NamedInstrProfRecord> Data;
  866. Error Err = Remapper->getRecords(FuncName, Data);
  867. if (Err)
  868. return std::move(Err);
  869. // Found it. Look for counters with the right hash.
  870. for (const NamedInstrProfRecord &I : Data) {
  871. // Check for a match and fill the vector if there is one.
  872. if (I.Hash == FuncHash)
  873. return std::move(I);
  874. }
  875. return error(instrprof_error::hash_mismatch);
  876. }
  877. Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
  878. uint64_t FuncHash,
  879. std::vector<uint64_t> &Counts) {
  880. Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
  881. if (Error E = Record.takeError())
  882. return error(std::move(E));
  883. Counts = Record.get().Counts;
  884. return success();
  885. }
  886. Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
  887. ArrayRef<NamedInstrProfRecord> Data;
  888. Error E = Index->getRecords(Data);
  889. if (E)
  890. return error(std::move(E));
  891. Record = Data[RecordIndex++];
  892. if (RecordIndex >= Data.size()) {
  893. Index->advanceToNextKey();
  894. RecordIndex = 0;
  895. }
  896. return success();
  897. }
  898. void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
  899. uint64_t NumFuncs = 0;
  900. for (const auto &Func : *this) {
  901. if (isIRLevelProfile()) {
  902. bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
  903. if (FuncIsCS != IsCS)
  904. continue;
  905. }
  906. Func.accumulateCounts(Sum);
  907. ++NumFuncs;
  908. }
  909. Sum.NumEntries = NumFuncs;
  910. }