InstrProfReader.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933
  1. //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains support for reading profiling data for clang's
  10. // instrumentation based PGO and coverage.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/ProfileData/InstrProfReader.h"
  14. #include "llvm/ADT/ArrayRef.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/StringExtras.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/IR/ProfileSummary.h"
  20. #include "llvm/ProfileData/InstrProf.h"
  21. #include "llvm/ProfileData/ProfileCommon.h"
  22. #include "llvm/Support/Endian.h"
  23. #include "llvm/Support/Error.h"
  24. #include "llvm/Support/ErrorOr.h"
  25. #include "llvm/Support/MemoryBuffer.h"
  26. #include "llvm/Support/SymbolRemappingReader.h"
  27. #include "llvm/Support/SwapByteOrder.h"
  28. #include <algorithm>
  29. #include <cctype>
  30. #include <cstddef>
  31. #include <cstdint>
  32. #include <limits>
  33. #include <memory>
  34. #include <system_error>
  35. #include <utility>
  36. #include <vector>
  37. using namespace llvm;
  38. static Expected<std::unique_ptr<MemoryBuffer>>
  39. setupMemoryBuffer(const Twine &Path) {
  40. ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
  41. MemoryBuffer::getFileOrSTDIN(Path);
  42. if (std::error_code EC = BufferOrErr.getError())
  43. return errorCodeToError(EC);
  44. return std::move(BufferOrErr.get());
  45. }
  46. static Error initializeReader(InstrProfReader &Reader) {
  47. return Reader.readHeader();
  48. }
  49. Expected<std::unique_ptr<InstrProfReader>>
  50. InstrProfReader::create(const Twine &Path) {
  51. // Set up the buffer to read.
  52. auto BufferOrError = setupMemoryBuffer(Path);
  53. if (Error E = BufferOrError.takeError())
  54. return std::move(E);
  55. return InstrProfReader::create(std::move(BufferOrError.get()));
  56. }
  57. Expected<std::unique_ptr<InstrProfReader>>
  58. InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
  59. // Sanity check the buffer.
  60. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
  61. return make_error<InstrProfError>(instrprof_error::too_large);
  62. if (Buffer->getBufferSize() == 0)
  63. return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
  64. std::unique_ptr<InstrProfReader> Result;
  65. // Create the reader.
  66. if (IndexedInstrProfReader::hasFormat(*Buffer))
  67. Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
  68. else if (RawInstrProfReader64::hasFormat(*Buffer))
  69. Result.reset(new RawInstrProfReader64(std::move(Buffer)));
  70. else if (RawInstrProfReader32::hasFormat(*Buffer))
  71. Result.reset(new RawInstrProfReader32(std::move(Buffer)));
  72. else if (TextInstrProfReader::hasFormat(*Buffer))
  73. Result.reset(new TextInstrProfReader(std::move(Buffer)));
  74. else
  75. return make_error<InstrProfError>(instrprof_error::unrecognized_format);
  76. // Initialize the reader and return the result.
  77. if (Error E = initializeReader(*Result))
  78. return std::move(E);
  79. return std::move(Result);
  80. }
  81. Expected<std::unique_ptr<IndexedInstrProfReader>>
  82. IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
  83. // Set up the buffer to read.
  84. auto BufferOrError = setupMemoryBuffer(Path);
  85. if (Error E = BufferOrError.takeError())
  86. return std::move(E);
  87. // Set up the remapping buffer if requested.
  88. std::unique_ptr<MemoryBuffer> RemappingBuffer;
  89. std::string RemappingPathStr = RemappingPath.str();
  90. if (!RemappingPathStr.empty()) {
  91. auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
  92. if (Error E = RemappingBufferOrError.takeError())
  93. return std::move(E);
  94. RemappingBuffer = std::move(RemappingBufferOrError.get());
  95. }
  96. return IndexedInstrProfReader::create(std::move(BufferOrError.get()),
  97. std::move(RemappingBuffer));
  98. }
  99. Expected<std::unique_ptr<IndexedInstrProfReader>>
  100. IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
  101. std::unique_ptr<MemoryBuffer> RemappingBuffer) {
  102. // Sanity check the buffer.
  103. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
  104. return make_error<InstrProfError>(instrprof_error::too_large);
  105. // Create the reader.
  106. if (!IndexedInstrProfReader::hasFormat(*Buffer))
  107. return make_error<InstrProfError>(instrprof_error::bad_magic);
  108. auto Result = std::make_unique<IndexedInstrProfReader>(
  109. std::move(Buffer), std::move(RemappingBuffer));
  110. // Initialize the reader and return the result.
  111. if (Error E = initializeReader(*Result))
  112. return std::move(E);
  113. return std::move(Result);
  114. }
  115. void InstrProfIterator::Increment() {
  116. if (auto E = Reader->readNextRecord(Record)) {
  117. // Handle errors in the reader.
  118. InstrProfError::take(std::move(E));
  119. *this = InstrProfIterator();
  120. }
  121. }
  122. bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
  123. // Verify that this really looks like plain ASCII text by checking a
  124. // 'reasonable' number of characters (up to profile magic size).
  125. size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
  126. StringRef buffer = Buffer.getBufferStart();
  127. return count == 0 ||
  128. std::all_of(buffer.begin(), buffer.begin() + count,
  129. [](char c) { return isPrint(c) || isSpace(c); });
  130. }
  131. // Read the profile variant flag from the header: ":FE" means this is a FE
  132. // generated profile. ":IR" means this is an IR level profile. Other strings
  133. // with a leading ':' will be reported an error format.
  134. Error TextInstrProfReader::readHeader() {
  135. Symtab.reset(new InstrProfSymtab());
  136. bool IsIRInstr = false;
  137. bool IsEntryFirst = false;
  138. bool IsCS = false;
  139. while (Line->startswith(":")) {
  140. StringRef Str = Line->substr(1);
  141. if (Str.equals_lower("ir"))
  142. IsIRInstr = true;
  143. else if (Str.equals_lower("fe"))
  144. IsIRInstr = false;
  145. else if (Str.equals_lower("csir")) {
  146. IsIRInstr = true;
  147. IsCS = true;
  148. } else if (Str.equals_lower("entry_first"))
  149. IsEntryFirst = true;
  150. else if (Str.equals_lower("not_entry_first"))
  151. IsEntryFirst = false;
  152. else
  153. return error(instrprof_error::bad_header);
  154. ++Line;
  155. }
  156. IsIRLevelProfile = IsIRInstr;
  157. InstrEntryBBEnabled = IsEntryFirst;
  158. HasCSIRLevelProfile = IsCS;
  159. return success();
  160. }
  161. Error
  162. TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
  163. #define CHECK_LINE_END(Line) \
  164. if (Line.is_at_end()) \
  165. return error(instrprof_error::truncated);
  166. #define READ_NUM(Str, Dst) \
  167. if ((Str).getAsInteger(10, (Dst))) \
  168. return error(instrprof_error::malformed);
  169. #define VP_READ_ADVANCE(Val) \
  170. CHECK_LINE_END(Line); \
  171. uint32_t Val; \
  172. READ_NUM((*Line), (Val)); \
  173. Line++;
  174. if (Line.is_at_end())
  175. return success();
  176. uint32_t NumValueKinds;
  177. if (Line->getAsInteger(10, NumValueKinds)) {
  178. // No value profile data
  179. return success();
  180. }
  181. if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
  182. return error(instrprof_error::malformed);
  183. Line++;
  184. for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
  185. VP_READ_ADVANCE(ValueKind);
  186. if (ValueKind > IPVK_Last)
  187. return error(instrprof_error::malformed);
  188. VP_READ_ADVANCE(NumValueSites);
  189. if (!NumValueSites)
  190. continue;
  191. Record.reserveSites(VK, NumValueSites);
  192. for (uint32_t S = 0; S < NumValueSites; S++) {
  193. VP_READ_ADVANCE(NumValueData);
  194. std::vector<InstrProfValueData> CurrentValues;
  195. for (uint32_t V = 0; V < NumValueData; V++) {
  196. CHECK_LINE_END(Line);
  197. std::pair<StringRef, StringRef> VD = Line->rsplit(':');
  198. uint64_t TakenCount, Value;
  199. if (ValueKind == IPVK_IndirectCallTarget) {
  200. if (InstrProfSymtab::isExternalSymbol(VD.first)) {
  201. Value = 0;
  202. } else {
  203. if (Error E = Symtab->addFuncName(VD.first))
  204. return E;
  205. Value = IndexedInstrProf::ComputeHash(VD.first);
  206. }
  207. } else {
  208. READ_NUM(VD.first, Value);
  209. }
  210. READ_NUM(VD.second, TakenCount);
  211. CurrentValues.push_back({Value, TakenCount});
  212. Line++;
  213. }
  214. Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
  215. nullptr);
  216. }
  217. }
  218. return success();
  219. #undef CHECK_LINE_END
  220. #undef READ_NUM
  221. #undef VP_READ_ADVANCE
  222. }
  223. Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
  224. // Skip empty lines and comments.
  225. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
  226. ++Line;
  227. // If we hit EOF while looking for a name, we're done.
  228. if (Line.is_at_end()) {
  229. return error(instrprof_error::eof);
  230. }
  231. // Read the function name.
  232. Record.Name = *Line++;
  233. if (Error E = Symtab->addFuncName(Record.Name))
  234. return error(std::move(E));
  235. // Read the function hash.
  236. if (Line.is_at_end())
  237. return error(instrprof_error::truncated);
  238. if ((Line++)->getAsInteger(0, Record.Hash))
  239. return error(instrprof_error::malformed);
  240. // Read the number of counters.
  241. uint64_t NumCounters;
  242. if (Line.is_at_end())
  243. return error(instrprof_error::truncated);
  244. if ((Line++)->getAsInteger(10, NumCounters))
  245. return error(instrprof_error::malformed);
  246. if (NumCounters == 0)
  247. return error(instrprof_error::malformed);
  248. // Read each counter and fill our internal storage with the values.
  249. Record.Clear();
  250. Record.Counts.reserve(NumCounters);
  251. for (uint64_t I = 0; I < NumCounters; ++I) {
  252. if (Line.is_at_end())
  253. return error(instrprof_error::truncated);
  254. uint64_t Count;
  255. if ((Line++)->getAsInteger(10, Count))
  256. return error(instrprof_error::malformed);
  257. Record.Counts.push_back(Count);
  258. }
  259. // Check if value profile data exists and read it if so.
  260. if (Error E = readValueProfileData(Record))
  261. return error(std::move(E));
  262. return success();
  263. }
  264. template <class IntPtrT>
  265. bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
  266. if (DataBuffer.getBufferSize() < sizeof(uint64_t))
  267. return false;
  268. uint64_t Magic =
  269. *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
  270. return RawInstrProf::getMagic<IntPtrT>() == Magic ||
  271. sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
  272. }
  273. template <class IntPtrT>
  274. Error RawInstrProfReader<IntPtrT>::readHeader() {
  275. if (!hasFormat(*DataBuffer))
  276. return error(instrprof_error::bad_magic);
  277. if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
  278. return error(instrprof_error::bad_header);
  279. auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
  280. DataBuffer->getBufferStart());
  281. ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
  282. return readHeader(*Header);
  283. }
  284. template <class IntPtrT>
  285. Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
  286. const char *End = DataBuffer->getBufferEnd();
  287. // Skip zero padding between profiles.
  288. while (CurrentPos != End && *CurrentPos == 0)
  289. ++CurrentPos;
  290. // If there's nothing left, we're done.
  291. if (CurrentPos == End)
  292. return make_error<InstrProfError>(instrprof_error::eof);
  293. // If there isn't enough space for another header, this is probably just
  294. // garbage at the end of the file.
  295. if (CurrentPos + sizeof(RawInstrProf::Header) > End)
  296. return make_error<InstrProfError>(instrprof_error::malformed);
  297. // The writer ensures each profile is padded to start at an aligned address.
  298. if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
  299. return make_error<InstrProfError>(instrprof_error::malformed);
  300. // The magic should have the same byte order as in the previous header.
  301. uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
  302. if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
  303. return make_error<InstrProfError>(instrprof_error::bad_magic);
  304. // There's another profile to read, so we need to process the header.
  305. auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
  306. return readHeader(*Header);
  307. }
  308. template <class IntPtrT>
  309. Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
  310. if (Error E = Symtab.create(StringRef(NamesStart, NamesSize)))
  311. return error(std::move(E));
  312. for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
  313. const IntPtrT FPtr = swap(I->FunctionPointer);
  314. if (!FPtr)
  315. continue;
  316. Symtab.mapAddress(FPtr, I->NameRef);
  317. }
  318. return success();
  319. }
  320. template <class IntPtrT>
  321. Error RawInstrProfReader<IntPtrT>::readHeader(
  322. const RawInstrProf::Header &Header) {
  323. Version = swap(Header.Version);
  324. if (GET_VERSION(Version) != RawInstrProf::Version)
  325. return error(instrprof_error::unsupported_version);
  326. CountersDelta = swap(Header.CountersDelta);
  327. NamesDelta = swap(Header.NamesDelta);
  328. auto DataSize = swap(Header.DataSize);
  329. auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters);
  330. auto CountersSize = swap(Header.CountersSize);
  331. auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
  332. NamesSize = swap(Header.NamesSize);
  333. ValueKindLast = swap(Header.ValueKindLast);
  334. auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
  335. auto PaddingSize = getNumPaddingBytes(NamesSize);
  336. ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
  337. ptrdiff_t CountersOffset =
  338. DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters;
  339. ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) +
  340. PaddingBytesAfterCounters;
  341. ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
  342. auto *Start = reinterpret_cast<const char *>(&Header);
  343. if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
  344. return error(instrprof_error::bad_header);
  345. Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
  346. Start + DataOffset);
  347. DataEnd = Data + DataSize;
  348. CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
  349. NamesStart = Start + NamesOffset;
  350. ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
  351. std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
  352. if (Error E = createSymtab(*NewSymtab.get()))
  353. return E;
  354. Symtab = std::move(NewSymtab);
  355. return success();
  356. }
  357. template <class IntPtrT>
  358. Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) {
  359. Record.Name = getName(Data->NameRef);
  360. return success();
  361. }
  362. template <class IntPtrT>
  363. Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) {
  364. Record.Hash = swap(Data->FuncHash);
  365. return success();
  366. }
  367. template <class IntPtrT>
  368. Error RawInstrProfReader<IntPtrT>::readRawCounts(
  369. InstrProfRecord &Record) {
  370. uint32_t NumCounters = swap(Data->NumCounters);
  371. IntPtrT CounterPtr = Data->CounterPtr;
  372. if (NumCounters == 0)
  373. return error(instrprof_error::malformed);
  374. auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
  375. ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart;
  376. // Check bounds. Note that the counter pointer embedded in the data record
  377. // may itself be corrupt.
  378. if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters)
  379. return error(instrprof_error::malformed);
  380. ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
  381. if (CounterOffset < 0 || CounterOffset > MaxNumCounters ||
  382. ((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
  383. return error(instrprof_error::malformed);
  384. auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters);
  385. if (ShouldSwapBytes) {
  386. Record.Counts.clear();
  387. Record.Counts.reserve(RawCounts.size());
  388. for (uint64_t Count : RawCounts)
  389. Record.Counts.push_back(swap(Count));
  390. } else
  391. Record.Counts = RawCounts;
  392. return success();
  393. }
  394. template <class IntPtrT>
  395. Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
  396. InstrProfRecord &Record) {
  397. Record.clearValueData();
  398. CurValueDataSize = 0;
  399. // Need to match the logic in value profile dumper code in compiler-rt:
  400. uint32_t NumValueKinds = 0;
  401. for (uint32_t I = 0; I < IPVK_Last + 1; I++)
  402. NumValueKinds += (Data->NumValueSites[I] != 0);
  403. if (!NumValueKinds)
  404. return success();
  405. Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
  406. ValueProfData::getValueProfData(
  407. ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(),
  408. getDataEndianness());
  409. if (Error E = VDataPtrOrErr.takeError())
  410. return E;
  411. // Note that besides deserialization, this also performs the conversion for
  412. // indirect call targets. The function pointers from the raw profile are
  413. // remapped into function name hashes.
  414. VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get());
  415. CurValueDataSize = VDataPtrOrErr.get()->getSize();
  416. return success();
  417. }
  418. template <class IntPtrT>
  419. Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) {
  420. if (atEnd())
  421. // At this point, ValueDataStart field points to the next header.
  422. if (Error E = readNextHeader(getNextHeaderPos()))
  423. return error(std::move(E));
  424. // Read name ad set it in Record.
  425. if (Error E = readName(Record))
  426. return error(std::move(E));
  427. // Read FuncHash and set it in Record.
  428. if (Error E = readFuncHash(Record))
  429. return error(std::move(E));
  430. // Read raw counts and set Record.
  431. if (Error E = readRawCounts(Record))
  432. return error(std::move(E));
  433. // Read value data and set Record.
  434. if (Error E = readValueProfilingData(Record))
  435. return error(std::move(E));
  436. // Iterate.
  437. advanceData();
  438. return success();
  439. }
  440. namespace llvm {
  441. template class RawInstrProfReader<uint32_t>;
  442. template class RawInstrProfReader<uint64_t>;
  443. } // end namespace llvm
  444. InstrProfLookupTrait::hash_value_type
  445. InstrProfLookupTrait::ComputeHash(StringRef K) {
  446. return IndexedInstrProf::ComputeHash(HashType, K);
  447. }
  448. using data_type = InstrProfLookupTrait::data_type;
  449. using offset_type = InstrProfLookupTrait::offset_type;
  450. bool InstrProfLookupTrait::readValueProfilingData(
  451. const unsigned char *&D, const unsigned char *const End) {
  452. Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
  453. ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
  454. if (VDataPtrOrErr.takeError())
  455. return false;
  456. VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
  457. D += VDataPtrOrErr.get()->TotalSize;
  458. return true;
  459. }
  460. data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
  461. offset_type N) {
  462. using namespace support;
  463. // Check if the data is corrupt. If so, don't try to read it.
  464. if (N % sizeof(uint64_t))
  465. return data_type();
  466. DataBuffer.clear();
  467. std::vector<uint64_t> CounterBuffer;
  468. const unsigned char *End = D + N;
  469. while (D < End) {
  470. // Read hash.
  471. if (D + sizeof(uint64_t) >= End)
  472. return data_type();
  473. uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
  474. // Initialize number of counters for GET_VERSION(FormatVersion) == 1.
  475. uint64_t CountsSize = N / sizeof(uint64_t) - 1;
  476. // If format version is different then read the number of counters.
  477. if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) {
  478. if (D + sizeof(uint64_t) > End)
  479. return data_type();
  480. CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
  481. }
  482. // Read counter values.
  483. if (D + CountsSize * sizeof(uint64_t) > End)
  484. return data_type();
  485. CounterBuffer.clear();
  486. CounterBuffer.reserve(CountsSize);
  487. for (uint64_t J = 0; J < CountsSize; ++J)
  488. CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
  489. DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
  490. // Read value profiling data.
  491. if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 &&
  492. !readValueProfilingData(D, End)) {
  493. DataBuffer.clear();
  494. return data_type();
  495. }
  496. }
  497. return DataBuffer;
  498. }
  499. template <typename HashTableImpl>
  500. Error InstrProfReaderIndex<HashTableImpl>::getRecords(
  501. StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) {
  502. auto Iter = HashTable->find(FuncName);
  503. if (Iter == HashTable->end())
  504. return make_error<InstrProfError>(instrprof_error::unknown_function);
  505. Data = (*Iter);
  506. if (Data.empty())
  507. return make_error<InstrProfError>(instrprof_error::malformed);
  508. return Error::success();
  509. }
  510. template <typename HashTableImpl>
  511. Error InstrProfReaderIndex<HashTableImpl>::getRecords(
  512. ArrayRef<NamedInstrProfRecord> &Data) {
  513. if (atEnd())
  514. return make_error<InstrProfError>(instrprof_error::eof);
  515. Data = *RecordIterator;
  516. if (Data.empty())
  517. return make_error<InstrProfError>(instrprof_error::malformed);
  518. return Error::success();
  519. }
  520. template <typename HashTableImpl>
  521. InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
  522. const unsigned char *Buckets, const unsigned char *const Payload,
  523. const unsigned char *const Base, IndexedInstrProf::HashT HashType,
  524. uint64_t Version) {
  525. FormatVersion = Version;
  526. HashTable.reset(HashTableImpl::Create(
  527. Buckets, Payload, Base,
  528. typename HashTableImpl::InfoType(HashType, Version)));
  529. RecordIterator = HashTable->data_begin();
  530. }
  531. namespace {
  532. /// A remapper that does not apply any remappings.
  533. class InstrProfReaderNullRemapper : public InstrProfReaderRemapper {
  534. InstrProfReaderIndexBase &Underlying;
  535. public:
  536. InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying)
  537. : Underlying(Underlying) {}
  538. Error getRecords(StringRef FuncName,
  539. ArrayRef<NamedInstrProfRecord> &Data) override {
  540. return Underlying.getRecords(FuncName, Data);
  541. }
  542. };
  543. }
  544. /// A remapper that applies remappings based on a symbol remapping file.
  545. template <typename HashTableImpl>
  546. class llvm::InstrProfReaderItaniumRemapper
  547. : public InstrProfReaderRemapper {
  548. public:
  549. InstrProfReaderItaniumRemapper(
  550. std::unique_ptr<MemoryBuffer> RemapBuffer,
  551. InstrProfReaderIndex<HashTableImpl> &Underlying)
  552. : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) {
  553. }
  554. /// Extract the original function name from a PGO function name.
  555. static StringRef extractName(StringRef Name) {
  556. // We can have multiple :-separated pieces; there can be pieces both
  557. // before and after the mangled name. Find the first part that starts
  558. // with '_Z'; we'll assume that's the mangled name we want.
  559. std::pair<StringRef, StringRef> Parts = {StringRef(), Name};
  560. while (true) {
  561. Parts = Parts.second.split(':');
  562. if (Parts.first.startswith("_Z"))
  563. return Parts.first;
  564. if (Parts.second.empty())
  565. return Name;
  566. }
  567. }
  568. /// Given a mangled name extracted from a PGO function name, and a new
  569. /// form for that mangled name, reconstitute the name.
  570. static void reconstituteName(StringRef OrigName, StringRef ExtractedName,
  571. StringRef Replacement,
  572. SmallVectorImpl<char> &Out) {
  573. Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size());
  574. Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin());
  575. Out.insert(Out.end(), Replacement.begin(), Replacement.end());
  576. Out.insert(Out.end(), ExtractedName.end(), OrigName.end());
  577. }
  578. Error populateRemappings() override {
  579. if (Error E = Remappings.read(*RemapBuffer))
  580. return E;
  581. for (StringRef Name : Underlying.HashTable->keys()) {
  582. StringRef RealName = extractName(Name);
  583. if (auto Key = Remappings.insert(RealName)) {
  584. // FIXME: We could theoretically map the same equivalence class to
  585. // multiple names in the profile data. If that happens, we should
  586. // return NamedInstrProfRecords from all of them.
  587. MappedNames.insert({Key, RealName});
  588. }
  589. }
  590. return Error::success();
  591. }
  592. Error getRecords(StringRef FuncName,
  593. ArrayRef<NamedInstrProfRecord> &Data) override {
  594. StringRef RealName = extractName(FuncName);
  595. if (auto Key = Remappings.lookup(RealName)) {
  596. StringRef Remapped = MappedNames.lookup(Key);
  597. if (!Remapped.empty()) {
  598. if (RealName.begin() == FuncName.begin() &&
  599. RealName.end() == FuncName.end())
  600. FuncName = Remapped;
  601. else {
  602. // Try rebuilding the name from the given remapping.
  603. SmallString<256> Reconstituted;
  604. reconstituteName(FuncName, RealName, Remapped, Reconstituted);
  605. Error E = Underlying.getRecords(Reconstituted, Data);
  606. if (!E)
  607. return E;
  608. // If we failed because the name doesn't exist, fall back to asking
  609. // about the original name.
  610. if (Error Unhandled = handleErrors(
  611. std::move(E), [](std::unique_ptr<InstrProfError> Err) {
  612. return Err->get() == instrprof_error::unknown_function
  613. ? Error::success()
  614. : Error(std::move(Err));
  615. }))
  616. return Unhandled;
  617. }
  618. }
  619. }
  620. return Underlying.getRecords(FuncName, Data);
  621. }
  622. private:
  623. /// The memory buffer containing the remapping configuration. Remappings
  624. /// holds pointers into this buffer.
  625. std::unique_ptr<MemoryBuffer> RemapBuffer;
  626. /// The mangling remapper.
  627. SymbolRemappingReader Remappings;
  628. /// Mapping from mangled name keys to the name used for the key in the
  629. /// profile data.
  630. /// FIXME: Can we store a location within the on-disk hash table instead of
  631. /// redoing lookup?
  632. DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames;
  633. /// The real profile data reader.
  634. InstrProfReaderIndex<HashTableImpl> &Underlying;
  635. };
  636. bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
  637. using namespace support;
  638. if (DataBuffer.getBufferSize() < 8)
  639. return false;
  640. uint64_t Magic =
  641. endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
  642. // Verify that it's magical.
  643. return Magic == IndexedInstrProf::Magic;
  644. }
  645. const unsigned char *
  646. IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
  647. const unsigned char *Cur, bool UseCS) {
  648. using namespace IndexedInstrProf;
  649. using namespace support;
  650. if (Version >= IndexedInstrProf::Version4) {
  651. const IndexedInstrProf::Summary *SummaryInLE =
  652. reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
  653. uint64_t NFields =
  654. endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
  655. uint64_t NEntries =
  656. endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
  657. uint32_t SummarySize =
  658. IndexedInstrProf::Summary::getSize(NFields, NEntries);
  659. std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
  660. IndexedInstrProf::allocSummary(SummarySize);
  661. const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
  662. uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
  663. for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
  664. Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
  665. SummaryEntryVector DetailedSummary;
  666. for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) {
  667. const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I);
  668. DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
  669. Ent.NumBlocks);
  670. }
  671. std::unique_ptr<llvm::ProfileSummary> &Summary =
  672. UseCS ? this->CS_Summary : this->Summary;
  673. // initialize InstrProfSummary using the SummaryData from disk.
  674. Summary = std::make_unique<ProfileSummary>(
  675. UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr,
  676. DetailedSummary, SummaryData->get(Summary::TotalBlockCount),
  677. SummaryData->get(Summary::MaxBlockCount),
  678. SummaryData->get(Summary::MaxInternalBlockCount),
  679. SummaryData->get(Summary::MaxFunctionCount),
  680. SummaryData->get(Summary::TotalNumBlocks),
  681. SummaryData->get(Summary::TotalNumFunctions));
  682. return Cur + SummarySize;
  683. } else {
  684. // The older versions do not support a profile summary. This just computes
  685. // an empty summary, which will not result in accurate hot/cold detection.
  686. // We would need to call addRecord for all NamedInstrProfRecords to get the
  687. // correct summary. However, this version is old (prior to early 2016) and
  688. // has not been supporting an accurate summary for several years.
  689. InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
  690. Summary = Builder.getSummary();
  691. return Cur;
  692. }
  693. }
  694. Error IndexedInstrProfReader::readHeader() {
  695. using namespace support;
  696. const unsigned char *Start =
  697. (const unsigned char *)DataBuffer->getBufferStart();
  698. const unsigned char *Cur = Start;
  699. if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
  700. return error(instrprof_error::truncated);
  701. auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
  702. Cur += sizeof(IndexedInstrProf::Header);
  703. // Check the magic number.
  704. uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
  705. if (Magic != IndexedInstrProf::Magic)
  706. return error(instrprof_error::bad_magic);
  707. // Read the version.
  708. uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
  709. if (GET_VERSION(FormatVersion) >
  710. IndexedInstrProf::ProfVersion::CurrentVersion)
  711. return error(instrprof_error::unsupported_version);
  712. Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
  713. /* UseCS */ false);
  714. if (FormatVersion & VARIANT_MASK_CSIR_PROF)
  715. Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
  716. /* UseCS */ true);
  717. // Read the hash type and start offset.
  718. IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
  719. endian::byte_swap<uint64_t, little>(Header->HashType));
  720. if (HashType > IndexedInstrProf::HashT::Last)
  721. return error(instrprof_error::unsupported_hash_type);
  722. uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
  723. // The rest of the file is an on disk hash table.
  724. auto IndexPtr =
  725. std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
  726. Start + HashOffset, Cur, Start, HashType, FormatVersion);
  727. // Load the remapping table now if requested.
  728. if (RemappingBuffer) {
  729. Remapper = std::make_unique<
  730. InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
  731. std::move(RemappingBuffer), *IndexPtr);
  732. if (Error E = Remapper->populateRemappings())
  733. return E;
  734. } else {
  735. Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr);
  736. }
  737. Index = std::move(IndexPtr);
  738. return success();
  739. }
  740. InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
  741. if (Symtab.get())
  742. return *Symtab.get();
  743. std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
  744. if (Error E = Index->populateSymtab(*NewSymtab.get())) {
  745. consumeError(error(InstrProfError::take(std::move(E))));
  746. }
  747. Symtab = std::move(NewSymtab);
  748. return *Symtab.get();
  749. }
  750. Expected<InstrProfRecord>
  751. IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
  752. uint64_t FuncHash) {
  753. ArrayRef<NamedInstrProfRecord> Data;
  754. Error Err = Remapper->getRecords(FuncName, Data);
  755. if (Err)
  756. return std::move(Err);
  757. // Found it. Look for counters with the right hash.
  758. for (unsigned I = 0, E = Data.size(); I < E; ++I) {
  759. // Check for a match and fill the vector if there is one.
  760. if (Data[I].Hash == FuncHash) {
  761. return std::move(Data[I]);
  762. }
  763. }
  764. return error(instrprof_error::hash_mismatch);
  765. }
  766. Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
  767. uint64_t FuncHash,
  768. std::vector<uint64_t> &Counts) {
  769. Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
  770. if (Error E = Record.takeError())
  771. return error(std::move(E));
  772. Counts = Record.get().Counts;
  773. return success();
  774. }
  775. Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
  776. ArrayRef<NamedInstrProfRecord> Data;
  777. Error E = Index->getRecords(Data);
  778. if (E)
  779. return error(std::move(E));
  780. Record = Data[RecordIndex++];
  781. if (RecordIndex >= Data.size()) {
  782. Index->advanceToNextKey();
  783. RecordIndex = 0;
  784. }
  785. return success();
  786. }
  787. void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
  788. uint64_t NumFuncs = 0;
  789. for (const auto &Func : *this) {
  790. if (isIRLevelProfile()) {
  791. bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
  792. if (FuncIsCS != IsCS)
  793. continue;
  794. }
  795. Func.accumulateCounts(Sum);
  796. ++NumFuncs;
  797. }
  798. Sum.NumEntries = NumFuncs;
  799. }