BitstreamReader.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Bitstream/BitstreamReader.h"
  9. #include "llvm/ADT/StringRef.h"
  10. #include <cassert>
  11. #include <string>
  12. using namespace llvm;
  13. //===----------------------------------------------------------------------===//
  14. // BitstreamCursor implementation
  15. //===----------------------------------------------------------------------===//
  16. /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
  17. Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
  18. // Save the current block's state on BlockScope.
  19. BlockScope.push_back(Block(CurCodeSize));
  20. BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
  21. // Add the abbrevs specific to this block to the CurAbbrevs list.
  22. if (BlockInfo) {
  23. if (const BitstreamBlockInfo::BlockInfo *Info =
  24. BlockInfo->getBlockInfo(BlockID)) {
  25. llvm::append_range(CurAbbrevs, Info->Abbrevs);
  26. }
  27. }
  28. // Get the codesize of this block.
  29. Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
  30. if (!MaybeVBR)
  31. return MaybeVBR.takeError();
  32. CurCodeSize = MaybeVBR.get();
  33. if (CurCodeSize > MaxChunkSize)
  34. return llvm::createStringError(
  35. std::errc::illegal_byte_sequence,
  36. "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
  37. CurCodeSize);
  38. SkipToFourByteBoundary();
  39. Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
  40. if (!MaybeNum)
  41. return MaybeNum.takeError();
  42. word_t NumWords = MaybeNum.get();
  43. if (NumWordsP)
  44. *NumWordsP = NumWords;
  45. if (CurCodeSize == 0)
  46. return llvm::createStringError(
  47. std::errc::illegal_byte_sequence,
  48. "can't enter sub-block: current code size is 0");
  49. if (AtEndOfStream())
  50. return llvm::createStringError(
  51. std::errc::illegal_byte_sequence,
  52. "can't enter sub block: already at end of stream");
  53. return Error::success();
  54. }
  55. static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
  56. const BitCodeAbbrevOp &Op) {
  57. assert(!Op.isLiteral() && "Not to be used with literals!");
  58. // Decode the value as we are commanded.
  59. switch (Op.getEncoding()) {
  60. case BitCodeAbbrevOp::Array:
  61. case BitCodeAbbrevOp::Blob:
  62. llvm_unreachable("Should not reach here");
  63. case BitCodeAbbrevOp::Fixed:
  64. assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
  65. return Cursor.Read((unsigned)Op.getEncodingData());
  66. case BitCodeAbbrevOp::VBR:
  67. assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
  68. return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
  69. case BitCodeAbbrevOp::Char6:
  70. if (Expected<unsigned> Res = Cursor.Read(6))
  71. return BitCodeAbbrevOp::DecodeChar6(Res.get());
  72. else
  73. return Res.takeError();
  74. }
  75. llvm_unreachable("invalid abbreviation encoding");
  76. }
  77. /// skipRecord - Read the current record and discard it.
  78. Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
  79. // Skip unabbreviated records by reading past their entries.
  80. if (AbbrevID == bitc::UNABBREV_RECORD) {
  81. Expected<uint32_t> MaybeCode = ReadVBR(6);
  82. if (!MaybeCode)
  83. return MaybeCode.takeError();
  84. unsigned Code = MaybeCode.get();
  85. Expected<uint32_t> MaybeVBR = ReadVBR(6);
  86. if (!MaybeVBR)
  87. return MaybeVBR.get();
  88. unsigned NumElts = MaybeVBR.get();
  89. for (unsigned i = 0; i != NumElts; ++i)
  90. if (Expected<uint64_t> Res = ReadVBR64(6))
  91. ; // Skip!
  92. else
  93. return Res.takeError();
  94. return Code;
  95. }
  96. const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
  97. const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
  98. unsigned Code;
  99. if (CodeOp.isLiteral())
  100. Code = CodeOp.getLiteralValue();
  101. else {
  102. if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
  103. CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
  104. return llvm::createStringError(
  105. std::errc::illegal_byte_sequence,
  106. "Abbreviation starts with an Array or a Blob");
  107. Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
  108. if (!MaybeCode)
  109. return MaybeCode.takeError();
  110. Code = MaybeCode.get();
  111. }
  112. for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
  113. const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
  114. if (Op.isLiteral())
  115. continue;
  116. if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
  117. Op.getEncoding() != BitCodeAbbrevOp::Blob) {
  118. if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
  119. continue;
  120. else
  121. return MaybeField.takeError();
  122. }
  123. if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
  124. // Array case. Read the number of elements as a vbr6.
  125. Expected<uint32_t> MaybeNum = ReadVBR(6);
  126. if (!MaybeNum)
  127. return MaybeNum.takeError();
  128. unsigned NumElts = MaybeNum.get();
  129. // Get the element encoding.
  130. assert(i+2 == e && "array op not second to last?");
  131. const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
  132. // Read all the elements.
  133. // Decode the value as we are commanded.
  134. switch (EltEnc.getEncoding()) {
  135. default:
  136. report_fatal_error("Array element type can't be an Array or a Blob");
  137. case BitCodeAbbrevOp::Fixed:
  138. assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
  139. if (Error Err =
  140. JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
  141. EltEnc.getEncodingData()))
  142. return std::move(Err);
  143. break;
  144. case BitCodeAbbrevOp::VBR:
  145. assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
  146. for (; NumElts; --NumElts)
  147. if (Expected<uint64_t> Res =
  148. ReadVBR64((unsigned)EltEnc.getEncodingData()))
  149. ; // Skip!
  150. else
  151. return Res.takeError();
  152. break;
  153. case BitCodeAbbrevOp::Char6:
  154. if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
  155. return std::move(Err);
  156. break;
  157. }
  158. continue;
  159. }
  160. assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
  161. // Blob case. Read the number of bytes as a vbr6.
  162. Expected<uint32_t> MaybeNum = ReadVBR(6);
  163. if (!MaybeNum)
  164. return MaybeNum.takeError();
  165. unsigned NumElts = MaybeNum.get();
  166. SkipToFourByteBoundary(); // 32-bit alignment
  167. // Figure out where the end of this blob will be including tail padding.
  168. const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
  169. // If this would read off the end of the bitcode file, just set the
  170. // record to empty and return.
  171. if (!canSkipToPos(NewEnd/8)) {
  172. skipToEnd();
  173. break;
  174. }
  175. // Skip over the blob.
  176. if (Error Err = JumpToBit(NewEnd))
  177. return std::move(Err);
  178. }
  179. return Code;
  180. }
  181. Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
  182. SmallVectorImpl<uint64_t> &Vals,
  183. StringRef *Blob) {
  184. if (AbbrevID == bitc::UNABBREV_RECORD) {
  185. Expected<uint32_t> MaybeCode = ReadVBR(6);
  186. if (!MaybeCode)
  187. return MaybeCode.takeError();
  188. uint32_t Code = MaybeCode.get();
  189. Expected<uint32_t> MaybeNumElts = ReadVBR(6);
  190. if (!MaybeNumElts)
  191. return MaybeNumElts.takeError();
  192. uint32_t NumElts = MaybeNumElts.get();
  193. Vals.reserve(Vals.size() + NumElts);
  194. for (unsigned i = 0; i != NumElts; ++i)
  195. if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
  196. Vals.push_back(MaybeVal.get());
  197. else
  198. return MaybeVal.takeError();
  199. return Code;
  200. }
  201. const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
  202. // Read the record code first.
  203. assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
  204. const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
  205. unsigned Code;
  206. if (CodeOp.isLiteral())
  207. Code = CodeOp.getLiteralValue();
  208. else {
  209. if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
  210. CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
  211. report_fatal_error("Abbreviation starts with an Array or a Blob");
  212. if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
  213. Code = MaybeCode.get();
  214. else
  215. return MaybeCode.takeError();
  216. }
  217. for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
  218. const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
  219. if (Op.isLiteral()) {
  220. Vals.push_back(Op.getLiteralValue());
  221. continue;
  222. }
  223. if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
  224. Op.getEncoding() != BitCodeAbbrevOp::Blob) {
  225. if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
  226. Vals.push_back(MaybeVal.get());
  227. else
  228. return MaybeVal.takeError();
  229. continue;
  230. }
  231. if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
  232. // Array case. Read the number of elements as a vbr6.
  233. Expected<uint32_t> MaybeNumElts = ReadVBR(6);
  234. if (!MaybeNumElts)
  235. return MaybeNumElts.takeError();
  236. uint32_t NumElts = MaybeNumElts.get();
  237. Vals.reserve(Vals.size() + NumElts);
  238. // Get the element encoding.
  239. if (i + 2 != e)
  240. report_fatal_error("Array op not second to last");
  241. const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
  242. if (!EltEnc.isEncoding())
  243. report_fatal_error(
  244. "Array element type has to be an encoding of a type");
  245. // Read all the elements.
  246. switch (EltEnc.getEncoding()) {
  247. default:
  248. report_fatal_error("Array element type can't be an Array or a Blob");
  249. case BitCodeAbbrevOp::Fixed:
  250. for (; NumElts; --NumElts)
  251. if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
  252. Read((unsigned)EltEnc.getEncodingData()))
  253. Vals.push_back(MaybeVal.get());
  254. else
  255. return MaybeVal.takeError();
  256. break;
  257. case BitCodeAbbrevOp::VBR:
  258. for (; NumElts; --NumElts)
  259. if (Expected<uint64_t> MaybeVal =
  260. ReadVBR64((unsigned)EltEnc.getEncodingData()))
  261. Vals.push_back(MaybeVal.get());
  262. else
  263. return MaybeVal.takeError();
  264. break;
  265. case BitCodeAbbrevOp::Char6:
  266. for (; NumElts; --NumElts)
  267. if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
  268. Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
  269. else
  270. return MaybeVal.takeError();
  271. }
  272. continue;
  273. }
  274. assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
  275. // Blob case. Read the number of bytes as a vbr6.
  276. Expected<uint32_t> MaybeNumElts = ReadVBR(6);
  277. if (!MaybeNumElts)
  278. return MaybeNumElts.takeError();
  279. uint32_t NumElts = MaybeNumElts.get();
  280. SkipToFourByteBoundary(); // 32-bit alignment
  281. // Figure out where the end of this blob will be including tail padding.
  282. size_t CurBitPos = GetCurrentBitNo();
  283. const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
  284. // If this would read off the end of the bitcode file, just set the
  285. // record to empty and return.
  286. if (!canSkipToPos(NewEnd/8)) {
  287. Vals.append(NumElts, 0);
  288. skipToEnd();
  289. break;
  290. }
  291. // Otherwise, inform the streamer that we need these bytes in memory. Skip
  292. // over tail padding first, in case jumping to NewEnd invalidates the Blob
  293. // pointer.
  294. if (Error Err = JumpToBit(NewEnd))
  295. return std::move(Err);
  296. const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
  297. // If we can return a reference to the data, do so to avoid copying it.
  298. if (Blob) {
  299. *Blob = StringRef(Ptr, NumElts);
  300. } else {
  301. // Otherwise, unpack into Vals with zero extension.
  302. auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
  303. Vals.append(UPtr, UPtr + NumElts);
  304. }
  305. }
  306. return Code;
  307. }
  308. Error BitstreamCursor::ReadAbbrevRecord() {
  309. auto Abbv = std::make_shared<BitCodeAbbrev>();
  310. Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
  311. if (!MaybeNumOpInfo)
  312. return MaybeNumOpInfo.takeError();
  313. unsigned NumOpInfo = MaybeNumOpInfo.get();
  314. for (unsigned i = 0; i != NumOpInfo; ++i) {
  315. Expected<word_t> MaybeIsLiteral = Read(1);
  316. if (!MaybeIsLiteral)
  317. return MaybeIsLiteral.takeError();
  318. bool IsLiteral = MaybeIsLiteral.get();
  319. if (IsLiteral) {
  320. Expected<uint64_t> MaybeOp = ReadVBR64(8);
  321. if (!MaybeOp)
  322. return MaybeOp.takeError();
  323. Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
  324. continue;
  325. }
  326. Expected<word_t> MaybeEncoding = Read(3);
  327. if (!MaybeEncoding)
  328. return MaybeEncoding.takeError();
  329. BitCodeAbbrevOp::Encoding E =
  330. (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
  331. if (BitCodeAbbrevOp::hasEncodingData(E)) {
  332. Expected<uint64_t> MaybeData = ReadVBR64(5);
  333. if (!MaybeData)
  334. return MaybeData.takeError();
  335. uint64_t Data = MaybeData.get();
  336. // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
  337. // and vbr(0) as a literal zero. This is decoded the same way, and avoids
  338. // a slow path in Read() to have to handle reading zero bits.
  339. if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
  340. Data == 0) {
  341. Abbv->Add(BitCodeAbbrevOp(0));
  342. continue;
  343. }
  344. if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
  345. Data > MaxChunkSize)
  346. report_fatal_error(
  347. "Fixed or VBR abbrev record with size > MaxChunkData");
  348. Abbv->Add(BitCodeAbbrevOp(E, Data));
  349. } else
  350. Abbv->Add(BitCodeAbbrevOp(E));
  351. }
  352. if (Abbv->getNumOperandInfos() == 0)
  353. report_fatal_error("Abbrev record with no operands");
  354. CurAbbrevs.push_back(std::move(Abbv));
  355. return Error::success();
  356. }
  357. Expected<Optional<BitstreamBlockInfo>>
  358. BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
  359. if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
  360. return std::move(Err);
  361. BitstreamBlockInfo NewBlockInfo;
  362. SmallVector<uint64_t, 64> Record;
  363. BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
  364. // Read all the records for this module.
  365. while (true) {
  366. Expected<BitstreamEntry> MaybeEntry =
  367. advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
  368. if (!MaybeEntry)
  369. return MaybeEntry.takeError();
  370. BitstreamEntry Entry = MaybeEntry.get();
  371. switch (Entry.Kind) {
  372. case llvm::BitstreamEntry::SubBlock: // Handled for us already.
  373. case llvm::BitstreamEntry::Error:
  374. return None;
  375. case llvm::BitstreamEntry::EndBlock:
  376. return std::move(NewBlockInfo);
  377. case llvm::BitstreamEntry::Record:
  378. // The interesting case.
  379. break;
  380. }
  381. // Read abbrev records, associate them with CurBID.
  382. if (Entry.ID == bitc::DEFINE_ABBREV) {
  383. if (!CurBlockInfo) return None;
  384. if (Error Err = ReadAbbrevRecord())
  385. return std::move(Err);
  386. // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
  387. // appropriate BlockInfo.
  388. CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
  389. CurAbbrevs.pop_back();
  390. continue;
  391. }
  392. // Read a record.
  393. Record.clear();
  394. Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
  395. if (!MaybeBlockInfo)
  396. return MaybeBlockInfo.takeError();
  397. switch (MaybeBlockInfo.get()) {
  398. default:
  399. break; // Default behavior, ignore unknown content.
  400. case bitc::BLOCKINFO_CODE_SETBID:
  401. if (Record.size() < 1)
  402. return None;
  403. CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
  404. break;
  405. case bitc::BLOCKINFO_CODE_BLOCKNAME: {
  406. if (!CurBlockInfo)
  407. return None;
  408. if (!ReadBlockInfoNames)
  409. break; // Ignore name.
  410. CurBlockInfo->Name = std::string(Record.begin(), Record.end());
  411. break;
  412. }
  413. case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
  414. if (!CurBlockInfo) return None;
  415. if (!ReadBlockInfoNames)
  416. break; // Ignore name.
  417. CurBlockInfo->RecordNames.emplace_back(
  418. (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
  419. break;
  420. }
  421. }
  422. }
  423. }