BitstreamReader.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/Bitstream/BitstreamReader.h"
  9. #include "llvm/ADT/StringRef.h"
  10. #include <cassert>
  11. #include <optional>
  12. #include <string>
  13. using namespace llvm;
  14. //===----------------------------------------------------------------------===//
  15. // BitstreamCursor implementation
  16. //===----------------------------------------------------------------------===//
  17. //
  18. static Error error(const char *Message) {
  19. return createStringError(std::errc::illegal_byte_sequence, Message);
  20. }
  21. /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
  22. Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
  23. // Save the current block's state on BlockScope.
  24. BlockScope.push_back(Block(CurCodeSize));
  25. BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
  26. // Add the abbrevs specific to this block to the CurAbbrevs list.
  27. if (BlockInfo) {
  28. if (const BitstreamBlockInfo::BlockInfo *Info =
  29. BlockInfo->getBlockInfo(BlockID)) {
  30. llvm::append_range(CurAbbrevs, Info->Abbrevs);
  31. }
  32. }
  33. // Get the codesize of this block.
  34. Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
  35. if (!MaybeVBR)
  36. return MaybeVBR.takeError();
  37. CurCodeSize = MaybeVBR.get();
  38. if (CurCodeSize > MaxChunkSize)
  39. return llvm::createStringError(
  40. std::errc::illegal_byte_sequence,
  41. "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
  42. CurCodeSize);
  43. SkipToFourByteBoundary();
  44. Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
  45. if (!MaybeNum)
  46. return MaybeNum.takeError();
  47. word_t NumWords = MaybeNum.get();
  48. if (NumWordsP)
  49. *NumWordsP = NumWords;
  50. if (CurCodeSize == 0)
  51. return llvm::createStringError(
  52. std::errc::illegal_byte_sequence,
  53. "can't enter sub-block: current code size is 0");
  54. if (AtEndOfStream())
  55. return llvm::createStringError(
  56. std::errc::illegal_byte_sequence,
  57. "can't enter sub block: already at end of stream");
  58. return Error::success();
  59. }
  60. static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
  61. const BitCodeAbbrevOp &Op) {
  62. assert(!Op.isLiteral() && "Not to be used with literals!");
  63. // Decode the value as we are commanded.
  64. switch (Op.getEncoding()) {
  65. case BitCodeAbbrevOp::Array:
  66. case BitCodeAbbrevOp::Blob:
  67. llvm_unreachable("Should not reach here");
  68. case BitCodeAbbrevOp::Fixed:
  69. assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
  70. return Cursor.Read((unsigned)Op.getEncodingData());
  71. case BitCodeAbbrevOp::VBR:
  72. assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
  73. return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
  74. case BitCodeAbbrevOp::Char6:
  75. if (Expected<unsigned> Res = Cursor.Read(6))
  76. return BitCodeAbbrevOp::DecodeChar6(Res.get());
  77. else
  78. return Res.takeError();
  79. }
  80. llvm_unreachable("invalid abbreviation encoding");
  81. }
  82. /// skipRecord - Read the current record and discard it.
  83. Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
  84. // Skip unabbreviated records by reading past their entries.
  85. if (AbbrevID == bitc::UNABBREV_RECORD) {
  86. Expected<uint32_t> MaybeCode = ReadVBR(6);
  87. if (!MaybeCode)
  88. return MaybeCode.takeError();
  89. unsigned Code = MaybeCode.get();
  90. Expected<uint32_t> MaybeVBR = ReadVBR(6);
  91. if (!MaybeVBR)
  92. return MaybeVBR.takeError();
  93. unsigned NumElts = MaybeVBR.get();
  94. for (unsigned i = 0; i != NumElts; ++i)
  95. if (Expected<uint64_t> Res = ReadVBR64(6))
  96. ; // Skip!
  97. else
  98. return Res.takeError();
  99. return Code;
  100. }
  101. Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
  102. if (!MaybeAbbv)
  103. return MaybeAbbv.takeError();
  104. const BitCodeAbbrev *Abbv = MaybeAbbv.get();
  105. const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
  106. unsigned Code;
  107. if (CodeOp.isLiteral())
  108. Code = CodeOp.getLiteralValue();
  109. else {
  110. if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
  111. CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
  112. return llvm::createStringError(
  113. std::errc::illegal_byte_sequence,
  114. "Abbreviation starts with an Array or a Blob");
  115. Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
  116. if (!MaybeCode)
  117. return MaybeCode.takeError();
  118. Code = MaybeCode.get();
  119. }
  120. for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
  121. const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
  122. if (Op.isLiteral())
  123. continue;
  124. if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
  125. Op.getEncoding() != BitCodeAbbrevOp::Blob) {
  126. if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
  127. continue;
  128. else
  129. return MaybeField.takeError();
  130. }
  131. if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
  132. // Array case. Read the number of elements as a vbr6.
  133. Expected<uint32_t> MaybeNum = ReadVBR(6);
  134. if (!MaybeNum)
  135. return MaybeNum.takeError();
  136. unsigned NumElts = MaybeNum.get();
  137. // Get the element encoding.
  138. assert(i+2 == e && "array op not second to last?");
  139. const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
  140. // Read all the elements.
  141. // Decode the value as we are commanded.
  142. switch (EltEnc.getEncoding()) {
  143. default:
  144. return error("Array element type can't be an Array or a Blob");
  145. case BitCodeAbbrevOp::Fixed:
  146. assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
  147. if (Error Err =
  148. JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
  149. EltEnc.getEncodingData()))
  150. return std::move(Err);
  151. break;
  152. case BitCodeAbbrevOp::VBR:
  153. assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
  154. for (; NumElts; --NumElts)
  155. if (Expected<uint64_t> Res =
  156. ReadVBR64((unsigned)EltEnc.getEncodingData()))
  157. ; // Skip!
  158. else
  159. return Res.takeError();
  160. break;
  161. case BitCodeAbbrevOp::Char6:
  162. if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
  163. return std::move(Err);
  164. break;
  165. }
  166. continue;
  167. }
  168. assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
  169. // Blob case. Read the number of bytes as a vbr6.
  170. Expected<uint32_t> MaybeNum = ReadVBR(6);
  171. if (!MaybeNum)
  172. return MaybeNum.takeError();
  173. unsigned NumElts = MaybeNum.get();
  174. SkipToFourByteBoundary(); // 32-bit alignment
  175. // Figure out where the end of this blob will be including tail padding.
  176. const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
  177. // If this would read off the end of the bitcode file, just set the
  178. // record to empty and return.
  179. if (!canSkipToPos(NewEnd/8)) {
  180. skipToEnd();
  181. break;
  182. }
  183. // Skip over the blob.
  184. if (Error Err = JumpToBit(NewEnd))
  185. return std::move(Err);
  186. }
  187. return Code;
  188. }
  189. Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
  190. SmallVectorImpl<uint64_t> &Vals,
  191. StringRef *Blob) {
  192. if (AbbrevID == bitc::UNABBREV_RECORD) {
  193. Expected<uint32_t> MaybeCode = ReadVBR(6);
  194. if (!MaybeCode)
  195. return MaybeCode.takeError();
  196. uint32_t Code = MaybeCode.get();
  197. Expected<uint32_t> MaybeNumElts = ReadVBR(6);
  198. if (!MaybeNumElts)
  199. return error(
  200. ("Failed to read size: " + toString(MaybeNumElts.takeError()))
  201. .c_str());
  202. uint32_t NumElts = MaybeNumElts.get();
  203. if (!isSizePlausible(NumElts))
  204. return error("Size is not plausible");
  205. Vals.reserve(Vals.size() + NumElts);
  206. for (unsigned i = 0; i != NumElts; ++i)
  207. if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
  208. Vals.push_back(MaybeVal.get());
  209. else
  210. return MaybeVal.takeError();
  211. return Code;
  212. }
  213. Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
  214. if (!MaybeAbbv)
  215. return MaybeAbbv.takeError();
  216. const BitCodeAbbrev *Abbv = MaybeAbbv.get();
  217. // Read the record code first.
  218. assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
  219. const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
  220. unsigned Code;
  221. if (CodeOp.isLiteral())
  222. Code = CodeOp.getLiteralValue();
  223. else {
  224. if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
  225. CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
  226. return error("Abbreviation starts with an Array or a Blob");
  227. if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
  228. Code = MaybeCode.get();
  229. else
  230. return MaybeCode.takeError();
  231. }
  232. for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
  233. const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
  234. if (Op.isLiteral()) {
  235. Vals.push_back(Op.getLiteralValue());
  236. continue;
  237. }
  238. if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
  239. Op.getEncoding() != BitCodeAbbrevOp::Blob) {
  240. if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
  241. Vals.push_back(MaybeVal.get());
  242. else
  243. return MaybeVal.takeError();
  244. continue;
  245. }
  246. if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
  247. // Array case. Read the number of elements as a vbr6.
  248. Expected<uint32_t> MaybeNumElts = ReadVBR(6);
  249. if (!MaybeNumElts)
  250. return error(
  251. ("Failed to read size: " + toString(MaybeNumElts.takeError()))
  252. .c_str());
  253. uint32_t NumElts = MaybeNumElts.get();
  254. if (!isSizePlausible(NumElts))
  255. return error("Size is not plausible");
  256. Vals.reserve(Vals.size() + NumElts);
  257. // Get the element encoding.
  258. if (i + 2 != e)
  259. return error("Array op not second to last");
  260. const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
  261. if (!EltEnc.isEncoding())
  262. return error(
  263. "Array element type has to be an encoding of a type");
  264. // Read all the elements.
  265. switch (EltEnc.getEncoding()) {
  266. default:
  267. return error("Array element type can't be an Array or a Blob");
  268. case BitCodeAbbrevOp::Fixed:
  269. for (; NumElts; --NumElts)
  270. if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
  271. Read((unsigned)EltEnc.getEncodingData()))
  272. Vals.push_back(MaybeVal.get());
  273. else
  274. return MaybeVal.takeError();
  275. break;
  276. case BitCodeAbbrevOp::VBR:
  277. for (; NumElts; --NumElts)
  278. if (Expected<uint64_t> MaybeVal =
  279. ReadVBR64((unsigned)EltEnc.getEncodingData()))
  280. Vals.push_back(MaybeVal.get());
  281. else
  282. return MaybeVal.takeError();
  283. break;
  284. case BitCodeAbbrevOp::Char6:
  285. for (; NumElts; --NumElts)
  286. if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
  287. Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
  288. else
  289. return MaybeVal.takeError();
  290. }
  291. continue;
  292. }
  293. assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
  294. // Blob case. Read the number of bytes as a vbr6.
  295. Expected<uint32_t> MaybeNumElts = ReadVBR(6);
  296. if (!MaybeNumElts)
  297. return MaybeNumElts.takeError();
  298. uint32_t NumElts = MaybeNumElts.get();
  299. SkipToFourByteBoundary(); // 32-bit alignment
  300. // Figure out where the end of this blob will be including tail padding.
  301. size_t CurBitPos = GetCurrentBitNo();
  302. const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
  303. // Make sure the bitstream is large enough to contain the blob.
  304. if (!canSkipToPos(NewEnd/8))
  305. return error("Blob ends too soon");
  306. // Otherwise, inform the streamer that we need these bytes in memory. Skip
  307. // over tail padding first, in case jumping to NewEnd invalidates the Blob
  308. // pointer.
  309. if (Error Err = JumpToBit(NewEnd))
  310. return std::move(Err);
  311. const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
  312. // If we can return a reference to the data, do so to avoid copying it.
  313. if (Blob) {
  314. *Blob = StringRef(Ptr, NumElts);
  315. } else {
  316. // Otherwise, unpack into Vals with zero extension.
  317. auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
  318. Vals.append(UPtr, UPtr + NumElts);
  319. }
  320. }
  321. return Code;
  322. }
  323. Error BitstreamCursor::ReadAbbrevRecord() {
  324. auto Abbv = std::make_shared<BitCodeAbbrev>();
  325. Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
  326. if (!MaybeNumOpInfo)
  327. return MaybeNumOpInfo.takeError();
  328. unsigned NumOpInfo = MaybeNumOpInfo.get();
  329. for (unsigned i = 0; i != NumOpInfo; ++i) {
  330. Expected<word_t> MaybeIsLiteral = Read(1);
  331. if (!MaybeIsLiteral)
  332. return MaybeIsLiteral.takeError();
  333. bool IsLiteral = MaybeIsLiteral.get();
  334. if (IsLiteral) {
  335. Expected<uint64_t> MaybeOp = ReadVBR64(8);
  336. if (!MaybeOp)
  337. return MaybeOp.takeError();
  338. Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
  339. continue;
  340. }
  341. Expected<word_t> MaybeEncoding = Read(3);
  342. if (!MaybeEncoding)
  343. return MaybeEncoding.takeError();
  344. if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
  345. return error("Invalid encoding");
  346. BitCodeAbbrevOp::Encoding E =
  347. (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
  348. if (BitCodeAbbrevOp::hasEncodingData(E)) {
  349. Expected<uint64_t> MaybeData = ReadVBR64(5);
  350. if (!MaybeData)
  351. return MaybeData.takeError();
  352. uint64_t Data = MaybeData.get();
  353. // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
  354. // and vbr(0) as a literal zero. This is decoded the same way, and avoids
  355. // a slow path in Read() to have to handle reading zero bits.
  356. if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
  357. Data == 0) {
  358. Abbv->Add(BitCodeAbbrevOp(0));
  359. continue;
  360. }
  361. if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
  362. Data > MaxChunkSize)
  363. return error("Fixed or VBR abbrev record with size > MaxChunkData");
  364. Abbv->Add(BitCodeAbbrevOp(E, Data));
  365. } else
  366. Abbv->Add(BitCodeAbbrevOp(E));
  367. }
  368. if (Abbv->getNumOperandInfos() == 0)
  369. return error("Abbrev record with no operands");
  370. CurAbbrevs.push_back(std::move(Abbv));
  371. return Error::success();
  372. }
  373. Expected<std::optional<BitstreamBlockInfo>>
  374. BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
  375. if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
  376. return std::move(Err);
  377. BitstreamBlockInfo NewBlockInfo;
  378. SmallVector<uint64_t, 64> Record;
  379. BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
  380. // Read all the records for this module.
  381. while (true) {
  382. Expected<BitstreamEntry> MaybeEntry =
  383. advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
  384. if (!MaybeEntry)
  385. return MaybeEntry.takeError();
  386. BitstreamEntry Entry = MaybeEntry.get();
  387. switch (Entry.Kind) {
  388. case llvm::BitstreamEntry::SubBlock: // Handled for us already.
  389. case llvm::BitstreamEntry::Error:
  390. return std::nullopt;
  391. case llvm::BitstreamEntry::EndBlock:
  392. return std::move(NewBlockInfo);
  393. case llvm::BitstreamEntry::Record:
  394. // The interesting case.
  395. break;
  396. }
  397. // Read abbrev records, associate them with CurBID.
  398. if (Entry.ID == bitc::DEFINE_ABBREV) {
  399. if (!CurBlockInfo)
  400. return std::nullopt;
  401. if (Error Err = ReadAbbrevRecord())
  402. return std::move(Err);
  403. // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
  404. // appropriate BlockInfo.
  405. CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
  406. CurAbbrevs.pop_back();
  407. continue;
  408. }
  409. // Read a record.
  410. Record.clear();
  411. Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
  412. if (!MaybeBlockInfo)
  413. return MaybeBlockInfo.takeError();
  414. switch (MaybeBlockInfo.get()) {
  415. default:
  416. break; // Default behavior, ignore unknown content.
  417. case bitc::BLOCKINFO_CODE_SETBID:
  418. if (Record.size() < 1)
  419. return std::nullopt;
  420. CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
  421. break;
  422. case bitc::BLOCKINFO_CODE_BLOCKNAME: {
  423. if (!CurBlockInfo)
  424. return std::nullopt;
  425. if (!ReadBlockInfoNames)
  426. break; // Ignore name.
  427. CurBlockInfo->Name = std::string(Record.begin(), Record.end());
  428. break;
  429. }
  430. case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
  431. if (!CurBlockInfo)
  432. return std::nullopt;
  433. if (!ReadBlockInfoNames)
  434. break; // Ignore name.
  435. CurBlockInfo->RecordNames.emplace_back(
  436. (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
  437. break;
  438. }
  439. }
  440. }
  441. }