BitstreamReader.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This header defines the BitstreamReader class. This class can be used to
  15. // read an arbitrary bitstream, regardless of its contents.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
  19. #define LLVM_BITSTREAM_BITSTREAMREADER_H
  20. #include "llvm/ADT/ArrayRef.h"
  21. #include "llvm/ADT/SmallVector.h"
  22. #include "llvm/Bitstream/BitCodes.h"
  23. #include "llvm/Support/Endian.h"
  24. #include "llvm/Support/Error.h"
  25. #include "llvm/Support/ErrorHandling.h"
  26. #include "llvm/Support/MathExtras.h"
  27. #include "llvm/Support/MemoryBuffer.h"
  28. #include <algorithm>
  29. #include <cassert>
  30. #include <climits>
  31. #include <cstddef>
  32. #include <cstdint>
  33. #include <memory>
  34. #include <string>
  35. #include <utility>
  36. #include <vector>
  37. namespace llvm {
  38. /// This class maintains the abbreviations read from a block info block.
  39. class BitstreamBlockInfo {
  40. public:
  41. /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
  42. /// describe abbreviations that all blocks of the specified ID inherit.
  43. struct BlockInfo {
  44. unsigned BlockID = 0;
  45. std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
  46. std::string Name;
  47. std::vector<std::pair<unsigned, std::string>> RecordNames;
  48. };
  49. private:
  50. std::vector<BlockInfo> BlockInfoRecords;
  51. public:
  52. /// If there is block info for the specified ID, return it, otherwise return
  53. /// null.
  54. const BlockInfo *getBlockInfo(unsigned BlockID) const {
  55. // Common case, the most recent entry matches BlockID.
  56. if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
  57. return &BlockInfoRecords.back();
  58. for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
  59. i != e; ++i)
  60. if (BlockInfoRecords[i].BlockID == BlockID)
  61. return &BlockInfoRecords[i];
  62. return nullptr;
  63. }
  64. BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
  65. if (const BlockInfo *BI = getBlockInfo(BlockID))
  66. return *const_cast<BlockInfo*>(BI);
  67. // Otherwise, add a new record.
  68. BlockInfoRecords.emplace_back();
  69. BlockInfoRecords.back().BlockID = BlockID;
  70. return BlockInfoRecords.back();
  71. }
  72. };
  73. /// This represents a position within a bitstream. There may be multiple
  74. /// independent cursors reading within one bitstream, each maintaining their
  75. /// own local state.
  76. class SimpleBitstreamCursor {
  77. ArrayRef<uint8_t> BitcodeBytes;
  78. size_t NextChar = 0;
  79. public:
  80. /// This is the current data we have pulled from the stream but have not
  81. /// returned to the client. This is specifically and intentionally defined to
  82. /// follow the word size of the host machine for efficiency. We use word_t in
  83. /// places that are aware of this to make it perfectly explicit what is going
  84. /// on.
  85. using word_t = size_t;
  86. private:
  87. word_t CurWord = 0;
  88. /// This is the number of bits in CurWord that are valid. This is always from
  89. /// [0...bits_of(size_t)-1] inclusive.
  90. unsigned BitsInCurWord = 0;
  91. public:
  92. static const constexpr size_t MaxChunkSize = sizeof(word_t) * 8;
  93. SimpleBitstreamCursor() = default;
  94. explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
  95. : BitcodeBytes(BitcodeBytes) {}
  96. explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
  97. : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
  98. explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
  99. : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
  100. bool canSkipToPos(size_t pos) const {
  101. // pos can be skipped to if it is a valid address or one byte past the end.
  102. return pos <= BitcodeBytes.size();
  103. }
  104. bool AtEndOfStream() {
  105. return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
  106. }
  107. /// Return the bit # of the bit we are reading.
  108. uint64_t GetCurrentBitNo() const {
  109. return NextChar*CHAR_BIT - BitsInCurWord;
  110. }
  111. // Return the byte # of the current bit.
  112. uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
  113. ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
  114. /// Reset the stream to the specified bit number.
  115. Error JumpToBit(uint64_t BitNo) {
  116. size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
  117. unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
  118. assert(canSkipToPos(ByteNo) && "Invalid location");
  119. // Move the cursor to the right word.
  120. NextChar = ByteNo;
  121. BitsInCurWord = 0;
  122. // Skip over any bits that are already consumed.
  123. if (WordBitNo) {
  124. if (Expected<word_t> Res = Read(WordBitNo))
  125. return Error::success();
  126. else
  127. return Res.takeError();
  128. }
  129. return Error::success();
  130. }
  131. /// Get a pointer into the bitstream at the specified byte offset.
  132. const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
  133. return BitcodeBytes.data() + ByteNo;
  134. }
  135. /// Get a pointer into the bitstream at the specified bit offset.
  136. ///
  137. /// The bit offset must be on a byte boundary.
  138. const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
  139. assert(!(BitNo % 8) && "Expected bit on byte boundary");
  140. return getPointerToByte(BitNo / 8, NumBytes);
  141. }
  142. Error fillCurWord() {
  143. if (NextChar >= BitcodeBytes.size())
  144. return createStringError(std::errc::io_error,
  145. "Unexpected end of file reading %u of %u bytes",
  146. NextChar, BitcodeBytes.size());
  147. // Read the next word from the stream.
  148. const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
  149. unsigned BytesRead;
  150. if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
  151. BytesRead = sizeof(word_t);
  152. CurWord =
  153. support::endian::read<word_t, support::little, support::unaligned>(
  154. NextCharPtr);
  155. } else {
  156. // Short read.
  157. BytesRead = BitcodeBytes.size() - NextChar;
  158. CurWord = 0;
  159. for (unsigned B = 0; B != BytesRead; ++B)
  160. CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
  161. }
  162. NextChar += BytesRead;
  163. BitsInCurWord = BytesRead * 8;
  164. return Error::success();
  165. }
  166. Expected<word_t> Read(unsigned NumBits) {
  167. static const unsigned BitsInWord = MaxChunkSize;
  168. assert(NumBits && NumBits <= BitsInWord &&
  169. "Cannot return zero or more than BitsInWord bits!");
  170. static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
  171. // If the field is fully contained by CurWord, return it quickly.
  172. if (BitsInCurWord >= NumBits) {
  173. word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
  174. // Use a mask to avoid undefined behavior.
  175. CurWord >>= (NumBits & Mask);
  176. BitsInCurWord -= NumBits;
  177. return R;
  178. }
  179. word_t R = BitsInCurWord ? CurWord : 0;
  180. unsigned BitsLeft = NumBits - BitsInCurWord;
  181. if (Error fillResult = fillCurWord())
  182. return std::move(fillResult);
  183. // If we run out of data, abort.
  184. if (BitsLeft > BitsInCurWord)
  185. return createStringError(std::errc::io_error,
  186. "Unexpected end of file reading %u of %u bits",
  187. BitsInCurWord, BitsLeft);
  188. word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
  189. // Use a mask to avoid undefined behavior.
  190. CurWord >>= (BitsLeft & Mask);
  191. BitsInCurWord -= BitsLeft;
  192. R |= R2 << (NumBits - BitsLeft);
  193. return R;
  194. }
  195. Expected<uint32_t> ReadVBR(unsigned NumBits) {
  196. Expected<unsigned> MaybeRead = Read(NumBits);
  197. if (!MaybeRead)
  198. return MaybeRead;
  199. uint32_t Piece = MaybeRead.get();
  200. if ((Piece & (1U << (NumBits-1))) == 0)
  201. return Piece;
  202. uint32_t Result = 0;
  203. unsigned NextBit = 0;
  204. while (true) {
  205. Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
  206. if ((Piece & (1U << (NumBits-1))) == 0)
  207. return Result;
  208. NextBit += NumBits-1;
  209. MaybeRead = Read(NumBits);
  210. if (!MaybeRead)
  211. return MaybeRead;
  212. Piece = MaybeRead.get();
  213. }
  214. }
  215. // Read a VBR that may have a value up to 64-bits in size. The chunk size of
  216. // the VBR must still be <= 32 bits though.
  217. Expected<uint64_t> ReadVBR64(unsigned NumBits) {
  218. Expected<uint64_t> MaybeRead = Read(NumBits);
  219. if (!MaybeRead)
  220. return MaybeRead;
  221. uint32_t Piece = MaybeRead.get();
  222. if ((Piece & (1U << (NumBits-1))) == 0)
  223. return uint64_t(Piece);
  224. uint64_t Result = 0;
  225. unsigned NextBit = 0;
  226. while (true) {
  227. Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
  228. if ((Piece & (1U << (NumBits-1))) == 0)
  229. return Result;
  230. NextBit += NumBits-1;
  231. MaybeRead = Read(NumBits);
  232. if (!MaybeRead)
  233. return MaybeRead;
  234. Piece = MaybeRead.get();
  235. }
  236. }
  237. void SkipToFourByteBoundary() {
  238. // If word_t is 64-bits and if we've read less than 32 bits, just dump
  239. // the bits we have up to the next 32-bit boundary.
  240. if (sizeof(word_t) > 4 &&
  241. BitsInCurWord >= 32) {
  242. CurWord >>= BitsInCurWord-32;
  243. BitsInCurWord = 32;
  244. return;
  245. }
  246. BitsInCurWord = 0;
  247. }
  248. /// Return the size of the stream in bytes.
  249. size_t SizeInBytes() const { return BitcodeBytes.size(); }
  250. /// Skip to the end of the file.
  251. void skipToEnd() { NextChar = BitcodeBytes.size(); }
  252. };
  253. /// When advancing through a bitstream cursor, each advance can discover a few
  254. /// different kinds of entries:
  255. struct BitstreamEntry {
  256. enum {
  257. Error, // Malformed bitcode was found.
  258. EndBlock, // We've reached the end of the current block, (or the end of the
  259. // file, which is treated like a series of EndBlock records.
  260. SubBlock, // This is the start of a new subblock of a specific ID.
  261. Record // This is a record with a specific AbbrevID.
  262. } Kind;
  263. unsigned ID;
  264. static BitstreamEntry getError() {
  265. BitstreamEntry E; E.Kind = Error; return E;
  266. }
  267. static BitstreamEntry getEndBlock() {
  268. BitstreamEntry E; E.Kind = EndBlock; return E;
  269. }
  270. static BitstreamEntry getSubBlock(unsigned ID) {
  271. BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
  272. }
  273. static BitstreamEntry getRecord(unsigned AbbrevID) {
  274. BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
  275. }
  276. };
  277. /// This represents a position within a bitcode file, implemented on top of a
  278. /// SimpleBitstreamCursor.
  279. ///
  280. /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
  281. /// be passed by value.
  282. class BitstreamCursor : SimpleBitstreamCursor {
  283. // This is the declared size of code values used for the current block, in
  284. // bits.
  285. unsigned CurCodeSize = 2;
  286. /// Abbrevs installed at in this block.
  287. std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
  288. struct Block {
  289. unsigned PrevCodeSize;
  290. std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
  291. explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
  292. };
  293. /// This tracks the codesize of parent blocks.
  294. SmallVector<Block, 8> BlockScope;
  295. BitstreamBlockInfo *BlockInfo = nullptr;
  296. public:
  297. static const size_t MaxChunkSize = sizeof(word_t) * 8;
  298. BitstreamCursor() = default;
  299. explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
  300. : SimpleBitstreamCursor(BitcodeBytes) {}
  301. explicit BitstreamCursor(StringRef BitcodeBytes)
  302. : SimpleBitstreamCursor(BitcodeBytes) {}
  303. explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
  304. : SimpleBitstreamCursor(BitcodeBytes) {}
  305. using SimpleBitstreamCursor::AtEndOfStream;
  306. using SimpleBitstreamCursor::canSkipToPos;
  307. using SimpleBitstreamCursor::fillCurWord;
  308. using SimpleBitstreamCursor::getBitcodeBytes;
  309. using SimpleBitstreamCursor::GetCurrentBitNo;
  310. using SimpleBitstreamCursor::getCurrentByteNo;
  311. using SimpleBitstreamCursor::getPointerToByte;
  312. using SimpleBitstreamCursor::JumpToBit;
  313. using SimpleBitstreamCursor::Read;
  314. using SimpleBitstreamCursor::ReadVBR;
  315. using SimpleBitstreamCursor::ReadVBR64;
  316. using SimpleBitstreamCursor::SizeInBytes;
  317. using SimpleBitstreamCursor::skipToEnd;
  318. /// Return the number of bits used to encode an abbrev #.
  319. unsigned getAbbrevIDWidth() const { return CurCodeSize; }
  320. /// Flags that modify the behavior of advance().
  321. enum {
  322. /// If this flag is used, the advance() method does not automatically pop
  323. /// the block scope when the end of a block is reached.
  324. AF_DontPopBlockAtEnd = 1,
  325. /// If this flag is used, abbrev entries are returned just like normal
  326. /// records.
  327. AF_DontAutoprocessAbbrevs = 2
  328. };
  329. /// Advance the current bitstream, returning the next entry in the stream.
  330. Expected<BitstreamEntry> advance(unsigned Flags = 0) {
  331. while (true) {
  332. if (AtEndOfStream())
  333. return BitstreamEntry::getError();
  334. Expected<unsigned> MaybeCode = ReadCode();
  335. if (!MaybeCode)
  336. return MaybeCode.takeError();
  337. unsigned Code = MaybeCode.get();
  338. if (Code == bitc::END_BLOCK) {
  339. // Pop the end of the block unless Flags tells us not to.
  340. if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
  341. return BitstreamEntry::getError();
  342. return BitstreamEntry::getEndBlock();
  343. }
  344. if (Code == bitc::ENTER_SUBBLOCK) {
  345. if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
  346. return BitstreamEntry::getSubBlock(MaybeSubBlock.get());
  347. else
  348. return MaybeSubBlock.takeError();
  349. }
  350. if (Code == bitc::DEFINE_ABBREV &&
  351. !(Flags & AF_DontAutoprocessAbbrevs)) {
  352. // We read and accumulate abbrev's, the client can't do anything with
  353. // them anyway.
  354. if (Error Err = ReadAbbrevRecord())
  355. return std::move(Err);
  356. continue;
  357. }
  358. return BitstreamEntry::getRecord(Code);
  359. }
  360. }
  361. /// This is a convenience function for clients that don't expect any
  362. /// subblocks. This just skips over them automatically.
  363. Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
  364. while (true) {
  365. // If we found a normal entry, return it.
  366. Expected<BitstreamEntry> MaybeEntry = advance(Flags);
  367. if (!MaybeEntry)
  368. return MaybeEntry;
  369. BitstreamEntry Entry = MaybeEntry.get();
  370. if (Entry.Kind != BitstreamEntry::SubBlock)
  371. return Entry;
  372. // If we found a sub-block, just skip over it and check the next entry.
  373. if (Error Err = SkipBlock())
  374. return std::move(Err);
  375. }
  376. }
  377. Expected<unsigned> ReadCode() { return Read(CurCodeSize); }
  378. // Block header:
  379. // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
  380. /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
  381. Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); }
  382. /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
  383. /// of this block.
  384. Error SkipBlock() {
  385. // Read and ignore the codelen value.
  386. if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth))
  387. ; // Since we are skipping this block, we don't care what code widths are
  388. // used inside of it.
  389. else
  390. return Res.takeError();
  391. SkipToFourByteBoundary();
  392. Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth);
  393. if (!MaybeNum)
  394. return MaybeNum.takeError();
  395. size_t NumFourBytes = MaybeNum.get();
  396. // Check that the block wasn't partially defined, and that the offset isn't
  397. // bogus.
  398. size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
  399. if (AtEndOfStream())
  400. return createStringError(std::errc::illegal_byte_sequence,
  401. "can't skip block: already at end of stream");
  402. if (!canSkipToPos(SkipTo / 8))
  403. return createStringError(std::errc::illegal_byte_sequence,
  404. "can't skip to bit %zu from %" PRIu64, SkipTo,
  405. GetCurrentBitNo());
  406. if (Error Res = JumpToBit(SkipTo))
  407. return Res;
  408. return Error::success();
  409. }
  410. /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
  411. Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
  412. bool ReadBlockEnd() {
  413. if (BlockScope.empty()) return true;
  414. // Block tail:
  415. // [END_BLOCK, <align4bytes>]
  416. SkipToFourByteBoundary();
  417. popBlockScope();
  418. return false;
  419. }
  420. private:
  421. void popBlockScope() {
  422. CurCodeSize = BlockScope.back().PrevCodeSize;
  423. CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
  424. BlockScope.pop_back();
  425. }
  426. //===--------------------------------------------------------------------===//
  427. // Record Processing
  428. //===--------------------------------------------------------------------===//
  429. public:
  430. /// Return the abbreviation for the specified AbbrevId.
  431. const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
  432. unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
  433. if (AbbrevNo >= CurAbbrevs.size())
  434. report_fatal_error("Invalid abbrev number");
  435. return CurAbbrevs[AbbrevNo].get();
  436. }
  437. /// Read the current record and discard it, returning the code for the record.
  438. Expected<unsigned> skipRecord(unsigned AbbrevID);
  439. Expected<unsigned> readRecord(unsigned AbbrevID,
  440. SmallVectorImpl<uint64_t> &Vals,
  441. StringRef *Blob = nullptr);
  442. //===--------------------------------------------------------------------===//
  443. // Abbrev Processing
  444. //===--------------------------------------------------------------------===//
  445. Error ReadAbbrevRecord();
  446. /// Read and return a block info block from the bitstream. If an error was
  447. /// encountered, return None.
  448. ///
  449. /// \param ReadBlockInfoNames Whether to read block/record name information in
  450. /// the BlockInfo block. Only llvm-bcanalyzer uses this.
  451. Expected<Optional<BitstreamBlockInfo>>
  452. ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
  453. /// Set the block info to be used by this BitstreamCursor to interpret
  454. /// abbreviated records.
  455. void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
  456. };
  457. } // end llvm namespace
  458. #endif // LLVM_BITSTREAM_BITSTREAMREADER_H
  459. #ifdef __GNUC__
  460. #pragma GCC diagnostic pop
  461. #endif