InstrProfReader.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file contains support for reading profiling data for instrumentation
  15. // based PGO and coverage.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
  19. #define LLVM_PROFILEDATA_INSTRPROFREADER_H
  20. #include "llvm/ADT/ArrayRef.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/IR/ProfileSummary.h"
  23. #include "llvm/ProfileData/InstrProf.h"
  24. #include "llvm/Support/Endian.h"
  25. #include "llvm/Support/Error.h"
  26. #include "llvm/Support/LineIterator.h"
  27. #include "llvm/Support/MemoryBuffer.h"
  28. #include "llvm/Support/OnDiskHashTable.h"
  29. #include "llvm/Support/SwapByteOrder.h"
  30. #include <algorithm>
  31. #include <cassert>
  32. #include <cstddef>
  33. #include <cstdint>
  34. #include <iterator>
  35. #include <memory>
  36. #include <utility>
  37. #include <vector>
  38. namespace llvm {
  39. class InstrProfReader;
  40. /// A file format agnostic iterator over profiling data.
  41. class InstrProfIterator : public std::iterator<std::input_iterator_tag,
  42. NamedInstrProfRecord> {
  43. InstrProfReader *Reader = nullptr;
  44. value_type Record;
  45. void Increment();
  46. public:
  47. InstrProfIterator() = default;
  48. InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
  49. InstrProfIterator &operator++() { Increment(); return *this; }
  50. bool operator==(const InstrProfIterator &RHS) const {
  51. return Reader == RHS.Reader;
  52. }
  53. bool operator!=(const InstrProfIterator &RHS) const {
  54. return Reader != RHS.Reader;
  55. }
  56. value_type &operator*() { return Record; }
  57. value_type *operator->() { return &Record; }
  58. };
  59. /// Base class and interface for reading profiling data of any known instrprof
  60. /// format. Provides an iterator over NamedInstrProfRecords.
  61. class InstrProfReader {
  62. instrprof_error LastError = instrprof_error::success;
  63. public:
  64. InstrProfReader() = default;
  65. virtual ~InstrProfReader() = default;
  66. /// Read the header. Required before reading first record.
  67. virtual Error readHeader() = 0;
  68. /// Read a single record.
  69. virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
  70. /// Iterator over profile data.
  71. InstrProfIterator begin() { return InstrProfIterator(this); }
  72. InstrProfIterator end() { return InstrProfIterator(); }
  73. virtual bool isIRLevelProfile() const = 0;
  74. virtual bool hasCSIRLevelProfile() const = 0;
  75. virtual bool instrEntryBBEnabled() const = 0;
  76. /// Return the PGO symtab. There are three different readers:
  77. /// Raw, Text, and Indexed profile readers. The first two types
  78. /// of readers are used only by llvm-profdata tool, while the indexed
  79. /// profile reader is also used by llvm-cov tool and the compiler (
  80. /// backend or frontend). Since creating PGO symtab can create
  81. /// significant runtime and memory overhead (as it touches data
  82. /// for the whole program), InstrProfSymtab for the indexed profile
  83. /// reader should be created on demand and it is recommended to be
  84. /// only used for dumping purpose with llvm-proftool, not with the
  85. /// compiler.
  86. virtual InstrProfSymtab &getSymtab() = 0;
  87. /// Compute the sum of counts and return in Sum.
  88. void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
  89. protected:
  90. std::unique_ptr<InstrProfSymtab> Symtab;
  91. /// Set the current error and return same.
  92. Error error(instrprof_error Err) {
  93. LastError = Err;
  94. if (Err == instrprof_error::success)
  95. return Error::success();
  96. return make_error<InstrProfError>(Err);
  97. }
  98. Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
  99. /// Clear the current error and return a successful one.
  100. Error success() { return error(instrprof_error::success); }
  101. public:
  102. /// Return true if the reader has finished reading the profile data.
  103. bool isEOF() { return LastError == instrprof_error::eof; }
  104. /// Return true if the reader encountered an error reading profiling data.
  105. bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
  106. /// Get the current error.
  107. Error getError() {
  108. if (hasError())
  109. return make_error<InstrProfError>(LastError);
  110. return Error::success();
  111. }
  112. /// Factory method to create an appropriately typed reader for the given
  113. /// instrprof file.
  114. static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
  115. static Expected<std::unique_ptr<InstrProfReader>>
  116. create(std::unique_ptr<MemoryBuffer> Buffer);
  117. };
  118. /// Reader for the simple text based instrprof format.
  119. ///
  120. /// This format is a simple text format that's suitable for test data. Records
  121. /// are separated by one or more blank lines, and record fields are separated by
  122. /// new lines.
  123. ///
  124. /// Each record consists of a function name, a function hash, a number of
  125. /// counters, and then each counter value, in that order.
  126. class TextInstrProfReader : public InstrProfReader {
  127. private:
  128. /// The profile data file contents.
  129. std::unique_ptr<MemoryBuffer> DataBuffer;
  130. /// Iterator over the profile data.
  131. line_iterator Line;
  132. bool IsIRLevelProfile = false;
  133. bool HasCSIRLevelProfile = false;
  134. bool InstrEntryBBEnabled = false;
  135. Error readValueProfileData(InstrProfRecord &Record);
  136. public:
  137. TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
  138. : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
  139. TextInstrProfReader(const TextInstrProfReader &) = delete;
  140. TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
  141. /// Return true if the given buffer is in text instrprof format.
  142. static bool hasFormat(const MemoryBuffer &Buffer);
  143. bool isIRLevelProfile() const override { return IsIRLevelProfile; }
  144. bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
  145. bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; }
  146. /// Read the header.
  147. Error readHeader() override;
  148. /// Read a single record.
  149. Error readNextRecord(NamedInstrProfRecord &Record) override;
  150. InstrProfSymtab &getSymtab() override {
  151. assert(Symtab.get());
  152. return *Symtab.get();
  153. }
  154. };
  155. /// Reader for the raw instrprof binary format from runtime.
  156. ///
  157. /// This format is a raw memory dump of the instrumentation-baed profiling data
  158. /// from the runtime. It has no index.
  159. ///
  160. /// Templated on the unsigned type whose size matches pointers on the platform
  161. /// that wrote the profile.
  162. template <class IntPtrT>
  163. class RawInstrProfReader : public InstrProfReader {
  164. private:
  165. /// The profile data file contents.
  166. std::unique_ptr<MemoryBuffer> DataBuffer;
  167. bool ShouldSwapBytes;
  168. // The value of the version field of the raw profile data header. The lower 56
  169. // bits specifies the format version and the most significant 8 bits specify
  170. // the variant types of the profile.
  171. uint64_t Version;
  172. uint64_t CountersDelta;
  173. uint64_t NamesDelta;
  174. const RawInstrProf::ProfileData<IntPtrT> *Data;
  175. const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
  176. const uint64_t *CountersStart;
  177. const char *NamesStart;
  178. uint64_t NamesSize;
  179. // After value profile is all read, this pointer points to
  180. // the header of next profile data (if exists)
  181. const uint8_t *ValueDataStart;
  182. uint32_t ValueKindLast;
  183. uint32_t CurValueDataSize;
  184. public:
  185. RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
  186. : DataBuffer(std::move(DataBuffer)) {}
  187. RawInstrProfReader(const RawInstrProfReader &) = delete;
  188. RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
  189. static bool hasFormat(const MemoryBuffer &DataBuffer);
  190. Error readHeader() override;
  191. Error readNextRecord(NamedInstrProfRecord &Record) override;
  192. bool isIRLevelProfile() const override {
  193. return (Version & VARIANT_MASK_IR_PROF) != 0;
  194. }
  195. bool hasCSIRLevelProfile() const override {
  196. return (Version & VARIANT_MASK_CSIR_PROF) != 0;
  197. }
  198. bool instrEntryBBEnabled() const override {
  199. return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
  200. }
  201. InstrProfSymtab &getSymtab() override {
  202. assert(Symtab.get());
  203. return *Symtab.get();
  204. }
  205. private:
  206. Error createSymtab(InstrProfSymtab &Symtab);
  207. Error readNextHeader(const char *CurrentPos);
  208. Error readHeader(const RawInstrProf::Header &Header);
  209. template <class IntT> IntT swap(IntT Int) const {
  210. return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
  211. }
  212. support::endianness getDataEndianness() const {
  213. support::endianness HostEndian = getHostEndianness();
  214. if (!ShouldSwapBytes)
  215. return HostEndian;
  216. if (HostEndian == support::little)
  217. return support::big;
  218. else
  219. return support::little;
  220. }
  221. inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
  222. return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
  223. }
  224. Error readName(NamedInstrProfRecord &Record);
  225. Error readFuncHash(NamedInstrProfRecord &Record);
  226. Error readRawCounts(InstrProfRecord &Record);
  227. Error readValueProfilingData(InstrProfRecord &Record);
  228. bool atEnd() const { return Data == DataEnd; }
  229. void advanceData() {
  230. Data++;
  231. ValueDataStart += CurValueDataSize;
  232. }
  233. const char *getNextHeaderPos() const {
  234. assert(atEnd());
  235. return (const char *)ValueDataStart;
  236. }
  237. /// Get the offset of \p CounterPtr from the start of the counters section of
  238. /// the profile. The offset has units of "number of counters", i.e. increasing
  239. /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
  240. ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
  241. return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
  242. }
  243. const uint64_t *getCounter(ptrdiff_t Offset) const {
  244. return CountersStart + Offset;
  245. }
  246. StringRef getName(uint64_t NameRef) const {
  247. return Symtab->getFuncName(swap(NameRef));
  248. }
  249. };
  250. using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
  251. using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
  252. namespace IndexedInstrProf {
  253. enum class HashT : uint32_t;
  254. } // end namespace IndexedInstrProf
  255. /// Trait for lookups into the on-disk hash table for the binary instrprof
  256. /// format.
  257. class InstrProfLookupTrait {
  258. std::vector<NamedInstrProfRecord> DataBuffer;
  259. IndexedInstrProf::HashT HashType;
  260. unsigned FormatVersion;
  261. // Endianness of the input value profile data.
  262. // It should be LE by default, but can be changed
  263. // for testing purpose.
  264. support::endianness ValueProfDataEndianness = support::little;
  265. public:
  266. InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
  267. : HashType(HashType), FormatVersion(FormatVersion) {}
  268. using data_type = ArrayRef<NamedInstrProfRecord>;
  269. using internal_key_type = StringRef;
  270. using external_key_type = StringRef;
  271. using hash_value_type = uint64_t;
  272. using offset_type = uint64_t;
  273. static bool EqualKey(StringRef A, StringRef B) { return A == B; }
  274. static StringRef GetInternalKey(StringRef K) { return K; }
  275. static StringRef GetExternalKey(StringRef K) { return K; }
  276. hash_value_type ComputeHash(StringRef K);
  277. static std::pair<offset_type, offset_type>
  278. ReadKeyDataLength(const unsigned char *&D) {
  279. using namespace support;
  280. offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
  281. offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
  282. return std::make_pair(KeyLen, DataLen);
  283. }
  284. StringRef ReadKey(const unsigned char *D, offset_type N) {
  285. return StringRef((const char *)D, N);
  286. }
  287. bool readValueProfilingData(const unsigned char *&D,
  288. const unsigned char *const End);
  289. data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
  290. // Used for testing purpose only.
  291. void setValueProfDataEndianness(support::endianness Endianness) {
  292. ValueProfDataEndianness = Endianness;
  293. }
  294. };
  295. struct InstrProfReaderIndexBase {
  296. virtual ~InstrProfReaderIndexBase() = default;
  297. // Read all the profile records with the same key pointed to the current
  298. // iterator.
  299. virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
  300. // Read all the profile records with the key equal to FuncName
  301. virtual Error getRecords(StringRef FuncName,
  302. ArrayRef<NamedInstrProfRecord> &Data) = 0;
  303. virtual void advanceToNextKey() = 0;
  304. virtual bool atEnd() const = 0;
  305. virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
  306. virtual uint64_t getVersion() const = 0;
  307. virtual bool isIRLevelProfile() const = 0;
  308. virtual bool hasCSIRLevelProfile() const = 0;
  309. virtual bool instrEntryBBEnabled() const = 0;
  310. virtual Error populateSymtab(InstrProfSymtab &) = 0;
  311. };
  312. using OnDiskHashTableImplV3 =
  313. OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
  314. template <typename HashTableImpl>
  315. class InstrProfReaderItaniumRemapper;
  316. template <typename HashTableImpl>
  317. class InstrProfReaderIndex : public InstrProfReaderIndexBase {
  318. private:
  319. std::unique_ptr<HashTableImpl> HashTable;
  320. typename HashTableImpl::data_iterator RecordIterator;
  321. uint64_t FormatVersion;
  322. friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
  323. public:
  324. InstrProfReaderIndex(const unsigned char *Buckets,
  325. const unsigned char *const Payload,
  326. const unsigned char *const Base,
  327. IndexedInstrProf::HashT HashType, uint64_t Version);
  328. ~InstrProfReaderIndex() override = default;
  329. Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
  330. Error getRecords(StringRef FuncName,
  331. ArrayRef<NamedInstrProfRecord> &Data) override;
  332. void advanceToNextKey() override { RecordIterator++; }
  333. bool atEnd() const override {
  334. return RecordIterator == HashTable->data_end();
  335. }
  336. void setValueProfDataEndianness(support::endianness Endianness) override {
  337. HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
  338. }
  339. uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
  340. bool isIRLevelProfile() const override {
  341. return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
  342. }
  343. bool hasCSIRLevelProfile() const override {
  344. return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
  345. }
  346. bool instrEntryBBEnabled() const override {
  347. return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
  348. }
  349. Error populateSymtab(InstrProfSymtab &Symtab) override {
  350. return Symtab.create(HashTable->keys());
  351. }
  352. };
  353. /// Name matcher supporting fuzzy matching of symbol names to names in profiles.
  354. class InstrProfReaderRemapper {
  355. public:
  356. virtual ~InstrProfReaderRemapper() {}
  357. virtual Error populateRemappings() { return Error::success(); }
  358. virtual Error getRecords(StringRef FuncName,
  359. ArrayRef<NamedInstrProfRecord> &Data) = 0;
  360. };
  361. /// Reader for the indexed binary instrprof format.
  362. class IndexedInstrProfReader : public InstrProfReader {
  363. private:
  364. /// The profile data file contents.
  365. std::unique_ptr<MemoryBuffer> DataBuffer;
  366. /// The profile remapping file contents.
  367. std::unique_ptr<MemoryBuffer> RemappingBuffer;
  368. /// The index into the profile data.
  369. std::unique_ptr<InstrProfReaderIndexBase> Index;
  370. /// The profile remapping file contents.
  371. std::unique_ptr<InstrProfReaderRemapper> Remapper;
  372. /// Profile summary data.
  373. std::unique_ptr<ProfileSummary> Summary;
  374. /// Context sensitive profile summary data.
  375. std::unique_ptr<ProfileSummary> CS_Summary;
  376. // Index to the current record in the record array.
  377. unsigned RecordIndex;
  378. // Read the profile summary. Return a pointer pointing to one byte past the
  379. // end of the summary data if it exists or the input \c Cur.
  380. // \c UseCS indicates whether to use the context-sensitive profile summary.
  381. const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
  382. const unsigned char *Cur, bool UseCS);
  383. public:
  384. IndexedInstrProfReader(
  385. std::unique_ptr<MemoryBuffer> DataBuffer,
  386. std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
  387. : DataBuffer(std::move(DataBuffer)),
  388. RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
  389. IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
  390. IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
  391. /// Return the profile version.
  392. uint64_t getVersion() const { return Index->getVersion(); }
  393. bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
  394. bool hasCSIRLevelProfile() const override {
  395. return Index->hasCSIRLevelProfile();
  396. }
  397. bool instrEntryBBEnabled() const override {
  398. return Index->instrEntryBBEnabled();
  399. }
  400. /// Return true if the given buffer is in an indexed instrprof format.
  401. static bool hasFormat(const MemoryBuffer &DataBuffer);
  402. /// Read the file header.
  403. Error readHeader() override;
  404. /// Read a single record.
  405. Error readNextRecord(NamedInstrProfRecord &Record) override;
  406. /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
  407. Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
  408. uint64_t FuncHash);
  409. /// Fill Counts with the profile data for the given function name.
  410. Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
  411. std::vector<uint64_t> &Counts);
  412. /// Return the maximum of all known function counts.
  413. /// \c UseCS indicates whether to use the context-sensitive count.
  414. uint64_t getMaximumFunctionCount(bool UseCS) {
  415. if (UseCS) {
  416. assert(CS_Summary && "No context sensitive profile summary");
  417. return CS_Summary->getMaxFunctionCount();
  418. } else {
  419. assert(Summary && "No profile summary");
  420. return Summary->getMaxFunctionCount();
  421. }
  422. }
  423. /// Factory method to create an indexed reader.
  424. static Expected<std::unique_ptr<IndexedInstrProfReader>>
  425. create(const Twine &Path, const Twine &RemappingPath = "");
  426. static Expected<std::unique_ptr<IndexedInstrProfReader>>
  427. create(std::unique_ptr<MemoryBuffer> Buffer,
  428. std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
  429. // Used for testing purpose only.
  430. void setValueProfDataEndianness(support::endianness Endianness) {
  431. Index->setValueProfDataEndianness(Endianness);
  432. }
  433. // See description in the base class. This interface is designed
  434. // to be used by llvm-profdata (for dumping). Avoid using this when
  435. // the client is the compiler.
  436. InstrProfSymtab &getSymtab() override;
  437. /// Return the profile summary.
  438. /// \c UseCS indicates whether to use the context-sensitive summary.
  439. ProfileSummary &getSummary(bool UseCS) {
  440. if (UseCS) {
  441. assert(CS_Summary && "No context sensitive summary");
  442. return *(CS_Summary.get());
  443. } else {
  444. assert(Summary && "No profile summary");
  445. return *(Summary.get());
  446. }
  447. }
  448. };
  449. } // end namespace llvm
  450. #endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
  451. #ifdef __GNUC__
  452. #pragma GCC diagnostic pop
  453. #endif