PDBFile.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/DebugInfo/MSF/MSFCommon.h"
  11. #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
  12. #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
  13. #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
  14. #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
  15. #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
  16. #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
  17. #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
  18. #include "llvm/DebugInfo/PDB/Native/RawError.h"
  19. #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
  20. #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  21. #include "llvm/Support/BinaryStream.h"
  22. #include "llvm/Support/BinaryStreamArray.h"
  23. #include "llvm/Support/BinaryStreamReader.h"
  24. #include "llvm/Support/Endian.h"
  25. #include "llvm/Support/Error.h"
  26. #include "llvm/Support/Path.h"
  27. #include <algorithm>
  28. #include <cassert>
  29. #include <cstdint>
  30. using namespace llvm;
  31. using namespace llvm::codeview;
  32. using namespace llvm::msf;
  33. using namespace llvm::pdb;
  34. namespace {
  35. typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
  36. } // end anonymous namespace
  37. PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
  38. BumpPtrAllocator &Allocator)
  39. : FilePath(std::string(Path)), Allocator(Allocator),
  40. Buffer(std::move(PdbFileBuffer)) {}
  41. PDBFile::~PDBFile() = default;
  42. StringRef PDBFile::getFilePath() const { return FilePath; }
  43. StringRef PDBFile::getFileDirectory() const {
  44. return sys::path::parent_path(FilePath);
  45. }
  46. uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
  47. uint32_t PDBFile::getFreeBlockMapBlock() const {
  48. return ContainerLayout.SB->FreeBlockMapBlock;
  49. }
  50. uint32_t PDBFile::getBlockCount() const {
  51. return ContainerLayout.SB->NumBlocks;
  52. }
  53. uint32_t PDBFile::getNumDirectoryBytes() const {
  54. return ContainerLayout.SB->NumDirectoryBytes;
  55. }
  56. uint32_t PDBFile::getBlockMapIndex() const {
  57. return ContainerLayout.SB->BlockMapAddr;
  58. }
  59. uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
  60. uint32_t PDBFile::getNumDirectoryBlocks() const {
  61. return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
  62. ContainerLayout.SB->BlockSize);
  63. }
  64. uint64_t PDBFile::getBlockMapOffset() const {
  65. return (uint64_t)ContainerLayout.SB->BlockMapAddr *
  66. ContainerLayout.SB->BlockSize;
  67. }
  68. uint32_t PDBFile::getNumStreams() const {
  69. return ContainerLayout.StreamSizes.size();
  70. }
  71. uint32_t PDBFile::getMaxStreamSize() const {
  72. return *std::max_element(ContainerLayout.StreamSizes.begin(),
  73. ContainerLayout.StreamSizes.end());
  74. }
  75. uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
  76. return ContainerLayout.StreamSizes[StreamIndex];
  77. }
  78. ArrayRef<support::ulittle32_t>
  79. PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
  80. return ContainerLayout.StreamMap[StreamIndex];
  81. }
  82. uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); }
  83. Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
  84. uint32_t NumBytes) const {
  85. uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
  86. ArrayRef<uint8_t> Result;
  87. if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
  88. return std::move(EC);
  89. return Result;
  90. }
  91. Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
  92. ArrayRef<uint8_t> Data) const {
  93. return make_error<RawError>(raw_error_code::not_writable,
  94. "PDBFile is immutable");
  95. }
  96. Error PDBFile::parseFileHeaders() {
  97. BinaryStreamReader Reader(*Buffer);
  98. // Initialize SB.
  99. const msf::SuperBlock *SB = nullptr;
  100. if (auto EC = Reader.readObject(SB)) {
  101. consumeError(std::move(EC));
  102. return make_error<RawError>(raw_error_code::corrupt_file,
  103. "MSF superblock is missing");
  104. }
  105. if (auto EC = msf::validateSuperBlock(*SB))
  106. return EC;
  107. if (Buffer->getLength() % SB->BlockSize != 0)
  108. return make_error<RawError>(raw_error_code::corrupt_file,
  109. "File size is not a multiple of block size");
  110. ContainerLayout.SB = SB;
  111. // Initialize Free Page Map.
  112. ContainerLayout.FreePageMap.resize(SB->NumBlocks);
  113. // The Fpm exists either at block 1 or block 2 of the MSF. However, this
  114. // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
  115. // thusly an equal number of total blocks in the file. For a block size
  116. // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
  117. // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
  118. // the Fpm is split across the file at `getBlockSize()` intervals. As a
  119. // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
  120. // for any non-negative integer k is an Fpm block. In theory, we only really
  121. // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
  122. // current versions of the MSF format already expect the Fpm to be arranged
  123. // at getBlockSize() intervals, so we have to be compatible.
  124. // See the function fpmPn() for more information:
  125. // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
  126. auto FpmStream =
  127. MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
  128. BinaryStreamReader FpmReader(*FpmStream);
  129. ArrayRef<uint8_t> FpmBytes;
  130. if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
  131. return EC;
  132. uint32_t BlocksRemaining = getBlockCount();
  133. uint32_t BI = 0;
  134. for (auto Byte : FpmBytes) {
  135. uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
  136. for (uint32_t I = 0; I < BlocksThisByte; ++I) {
  137. if (Byte & (1 << I))
  138. ContainerLayout.FreePageMap[BI] = true;
  139. --BlocksRemaining;
  140. ++BI;
  141. }
  142. }
  143. Reader.setOffset(getBlockMapOffset());
  144. if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
  145. getNumDirectoryBlocks()))
  146. return EC;
  147. return Error::success();
  148. }
  149. Error PDBFile::parseStreamData() {
  150. assert(ContainerLayout.SB);
  151. if (DirectoryStream)
  152. return Error::success();
  153. uint32_t NumStreams = 0;
  154. // Normally you can't use a MappedBlockStream without having fully parsed the
  155. // PDB file, because it accesses the directory and various other things, which
  156. // is exactly what we are attempting to parse. By specifying a custom
  157. // subclass of IPDBStreamData which only accesses the fields that have already
  158. // been parsed, we can avoid this and reuse MappedBlockStream.
  159. auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
  160. Allocator);
  161. BinaryStreamReader Reader(*DS);
  162. if (auto EC = Reader.readInteger(NumStreams))
  163. return EC;
  164. if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
  165. return EC;
  166. for (uint32_t I = 0; I < NumStreams; ++I) {
  167. uint32_t StreamSize = getStreamByteSize(I);
  168. // FIXME: What does StreamSize ~0U mean?
  169. uint64_t NumExpectedStreamBlocks =
  170. StreamSize == UINT32_MAX
  171. ? 0
  172. : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
  173. // For convenience, we store the block array contiguously. This is because
  174. // if someone calls setStreamMap(), it is more convenient to be able to call
  175. // it with an ArrayRef instead of setting up a StreamRef. Since the
  176. // DirectoryStream is cached in the class and thus lives for the life of the
  177. // class, we can be guaranteed that readArray() will return a stable
  178. // reference, even if it has to allocate from its internal pool.
  179. ArrayRef<support::ulittle32_t> Blocks;
  180. if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
  181. return EC;
  182. for (uint32_t Block : Blocks) {
  183. uint64_t BlockEndOffset =
  184. (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
  185. if (BlockEndOffset > getFileSize())
  186. return make_error<RawError>(raw_error_code::corrupt_file,
  187. "Stream block map is corrupt.");
  188. }
  189. ContainerLayout.StreamMap.push_back(Blocks);
  190. }
  191. // We should have read exactly SB->NumDirectoryBytes bytes.
  192. assert(Reader.bytesRemaining() == 0);
  193. DirectoryStream = std::move(DS);
  194. return Error::success();
  195. }
  196. ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
  197. return ContainerLayout.DirectoryBlocks;
  198. }
  199. std::unique_ptr<MappedBlockStream>
  200. PDBFile::createIndexedStream(uint16_t SN) const {
  201. if (SN == kInvalidStreamIndex)
  202. return nullptr;
  203. return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
  204. Allocator);
  205. }
  206. MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
  207. MSFStreamLayout Result;
  208. auto Blocks = getStreamBlockList(StreamIdx);
  209. Result.Blocks.assign(Blocks.begin(), Blocks.end());
  210. Result.Length = getStreamByteSize(StreamIdx);
  211. return Result;
  212. }
  213. msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
  214. return msf::getFpmStreamLayout(ContainerLayout);
  215. }
  216. Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
  217. if (!Globals) {
  218. auto DbiS = getPDBDbiStream();
  219. if (!DbiS)
  220. return DbiS.takeError();
  221. auto GlobalS =
  222. safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
  223. if (!GlobalS)
  224. return GlobalS.takeError();
  225. auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
  226. if (auto EC = TempGlobals->reload())
  227. return std::move(EC);
  228. Globals = std::move(TempGlobals);
  229. }
  230. return *Globals;
  231. }
  232. Expected<InfoStream &> PDBFile::getPDBInfoStream() {
  233. if (!Info) {
  234. auto InfoS = safelyCreateIndexedStream(StreamPDB);
  235. if (!InfoS)
  236. return InfoS.takeError();
  237. auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
  238. if (auto EC = TempInfo->reload())
  239. return std::move(EC);
  240. Info = std::move(TempInfo);
  241. }
  242. return *Info;
  243. }
  244. Expected<DbiStream &> PDBFile::getPDBDbiStream() {
  245. if (!Dbi) {
  246. auto DbiS = safelyCreateIndexedStream(StreamDBI);
  247. if (!DbiS)
  248. return DbiS.takeError();
  249. auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
  250. if (auto EC = TempDbi->reload(this))
  251. return std::move(EC);
  252. Dbi = std::move(TempDbi);
  253. }
  254. return *Dbi;
  255. }
  256. Expected<TpiStream &> PDBFile::getPDBTpiStream() {
  257. if (!Tpi) {
  258. auto TpiS = safelyCreateIndexedStream(StreamTPI);
  259. if (!TpiS)
  260. return TpiS.takeError();
  261. auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
  262. if (auto EC = TempTpi->reload())
  263. return std::move(EC);
  264. Tpi = std::move(TempTpi);
  265. }
  266. return *Tpi;
  267. }
  268. Expected<TpiStream &> PDBFile::getPDBIpiStream() {
  269. if (!Ipi) {
  270. if (!hasPDBIpiStream())
  271. return make_error<RawError>(raw_error_code::no_stream);
  272. auto IpiS = safelyCreateIndexedStream(StreamIPI);
  273. if (!IpiS)
  274. return IpiS.takeError();
  275. auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
  276. if (auto EC = TempIpi->reload())
  277. return std::move(EC);
  278. Ipi = std::move(TempIpi);
  279. }
  280. return *Ipi;
  281. }
  282. Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
  283. if (!Publics) {
  284. auto DbiS = getPDBDbiStream();
  285. if (!DbiS)
  286. return DbiS.takeError();
  287. auto PublicS =
  288. safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
  289. if (!PublicS)
  290. return PublicS.takeError();
  291. auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
  292. if (auto EC = TempPublics->reload())
  293. return std::move(EC);
  294. Publics = std::move(TempPublics);
  295. }
  296. return *Publics;
  297. }
  298. Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
  299. if (!Symbols) {
  300. auto DbiS = getPDBDbiStream();
  301. if (!DbiS)
  302. return DbiS.takeError();
  303. uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
  304. auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
  305. if (!SymbolS)
  306. return SymbolS.takeError();
  307. auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
  308. if (auto EC = TempSymbols->reload())
  309. return std::move(EC);
  310. Symbols = std::move(TempSymbols);
  311. }
  312. return *Symbols;
  313. }
  314. Expected<PDBStringTable &> PDBFile::getStringTable() {
  315. if (!Strings) {
  316. auto NS = safelyCreateNamedStream("/names");
  317. if (!NS)
  318. return NS.takeError();
  319. auto N = std::make_unique<PDBStringTable>();
  320. BinaryStreamReader Reader(**NS);
  321. if (auto EC = N->reload(Reader))
  322. return std::move(EC);
  323. assert(Reader.bytesRemaining() == 0);
  324. StringTableStream = std::move(*NS);
  325. Strings = std::move(N);
  326. }
  327. return *Strings;
  328. }
  329. Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
  330. if (!InjectedSources) {
  331. auto IJS = safelyCreateNamedStream("/src/headerblock");
  332. if (!IJS)
  333. return IJS.takeError();
  334. auto Strings = getStringTable();
  335. if (!Strings)
  336. return Strings.takeError();
  337. auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
  338. if (auto EC = IJ->reload(*Strings))
  339. return std::move(EC);
  340. InjectedSources = std::move(IJ);
  341. }
  342. return *InjectedSources;
  343. }
  344. uint32_t PDBFile::getPointerSize() {
  345. auto DbiS = getPDBDbiStream();
  346. if (!DbiS)
  347. return 0;
  348. PDB_Machine Machine = DbiS->getMachineType();
  349. if (Machine == PDB_Machine::Amd64)
  350. return 8;
  351. return 4;
  352. }
  353. bool PDBFile::hasPDBDbiStream() const {
  354. return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
  355. }
  356. bool PDBFile::hasPDBGlobalsStream() {
  357. auto DbiS = getPDBDbiStream();
  358. if (!DbiS) {
  359. consumeError(DbiS.takeError());
  360. return false;
  361. }
  362. return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
  363. }
  364. bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
  365. bool PDBFile::hasPDBIpiStream() const {
  366. if (!hasPDBInfoStream())
  367. return false;
  368. if (StreamIPI >= getNumStreams())
  369. return false;
  370. auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
  371. return InfoStream.containsIdStream();
  372. }
  373. bool PDBFile::hasPDBPublicsStream() {
  374. auto DbiS = getPDBDbiStream();
  375. if (!DbiS) {
  376. consumeError(DbiS.takeError());
  377. return false;
  378. }
  379. return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
  380. }
  381. bool PDBFile::hasPDBSymbolStream() {
  382. auto DbiS = getPDBDbiStream();
  383. if (!DbiS)
  384. return false;
  385. return DbiS->getSymRecordStreamIndex() < getNumStreams();
  386. }
  387. bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
  388. bool PDBFile::hasPDBStringTable() {
  389. auto IS = getPDBInfoStream();
  390. if (!IS)
  391. return false;
  392. Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
  393. if (!ExpectedNSI) {
  394. consumeError(ExpectedNSI.takeError());
  395. return false;
  396. }
  397. assert(*ExpectedNSI < getNumStreams());
  398. return true;
  399. }
  400. bool PDBFile::hasPDBInjectedSourceStream() {
  401. auto IS = getPDBInfoStream();
  402. if (!IS)
  403. return false;
  404. Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
  405. if (!ExpectedNSI) {
  406. consumeError(ExpectedNSI.takeError());
  407. return false;
  408. }
  409. assert(*ExpectedNSI < getNumStreams());
  410. return true;
  411. }
  412. /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
  413. /// stream with that index actually exists. If it does not, the return value
  414. /// will have an MSFError with code msf_error_code::no_stream. Else, the return
  415. /// value will contain the stream returned by createIndexedStream().
  416. Expected<std::unique_ptr<MappedBlockStream>>
  417. PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
  418. if (StreamIndex >= getNumStreams())
  419. // This rejects kInvalidStreamIndex with an error as well.
  420. return make_error<RawError>(raw_error_code::no_stream);
  421. return createIndexedStream(StreamIndex);
  422. }
  423. Expected<std::unique_ptr<MappedBlockStream>>
  424. PDBFile::safelyCreateNamedStream(StringRef Name) {
  425. auto IS = getPDBInfoStream();
  426. if (!IS)
  427. return IS.takeError();
  428. Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
  429. if (!ExpectedNSI)
  430. return ExpectedNSI.takeError();
  431. uint32_t NameStreamIndex = *ExpectedNSI;
  432. return safelyCreateIndexedStream(NameStreamIndex);
  433. }