PDBFile.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/ADT/STLExtras.h"
  11. #include "llvm/DebugInfo/MSF/MSFCommon.h"
  12. #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
  13. #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
  14. #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
  15. #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
  16. #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
  17. #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
  18. #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
  19. #include "llvm/DebugInfo/PDB/Native/RawError.h"
  20. #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
  21. #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  22. #include "llvm/Support/BinaryStream.h"
  23. #include "llvm/Support/BinaryStreamArray.h"
  24. #include "llvm/Support/BinaryStreamReader.h"
  25. #include "llvm/Support/Endian.h"
  26. #include "llvm/Support/Error.h"
  27. #include "llvm/Support/Path.h"
  28. #include <algorithm>
  29. #include <cassert>
  30. #include <cstdint>
  31. using namespace llvm;
  32. using namespace llvm::codeview;
  33. using namespace llvm::msf;
  34. using namespace llvm::pdb;
  35. namespace {
  36. typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
  37. } // end anonymous namespace
  38. PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
  39. BumpPtrAllocator &Allocator)
  40. : FilePath(std::string(Path)), Allocator(Allocator),
  41. Buffer(std::move(PdbFileBuffer)) {}
  42. PDBFile::~PDBFile() = default;
  43. StringRef PDBFile::getFilePath() const { return FilePath; }
  44. StringRef PDBFile::getFileDirectory() const {
  45. return sys::path::parent_path(FilePath);
  46. }
  47. uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
  48. uint32_t PDBFile::getFreeBlockMapBlock() const {
  49. return ContainerLayout.SB->FreeBlockMapBlock;
  50. }
  51. uint32_t PDBFile::getBlockCount() const {
  52. return ContainerLayout.SB->NumBlocks;
  53. }
  54. uint32_t PDBFile::getNumDirectoryBytes() const {
  55. return ContainerLayout.SB->NumDirectoryBytes;
  56. }
  57. uint32_t PDBFile::getBlockMapIndex() const {
  58. return ContainerLayout.SB->BlockMapAddr;
  59. }
  60. uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
  61. uint32_t PDBFile::getNumDirectoryBlocks() const {
  62. return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
  63. ContainerLayout.SB->BlockSize);
  64. }
  65. uint64_t PDBFile::getBlockMapOffset() const {
  66. return (uint64_t)ContainerLayout.SB->BlockMapAddr *
  67. ContainerLayout.SB->BlockSize;
  68. }
  69. uint32_t PDBFile::getNumStreams() const {
  70. return ContainerLayout.StreamSizes.size();
  71. }
  72. uint32_t PDBFile::getMaxStreamSize() const {
  73. return *std::max_element(ContainerLayout.StreamSizes.begin(),
  74. ContainerLayout.StreamSizes.end());
  75. }
  76. uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
  77. return ContainerLayout.StreamSizes[StreamIndex];
  78. }
  79. ArrayRef<support::ulittle32_t>
  80. PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
  81. return ContainerLayout.StreamMap[StreamIndex];
  82. }
  83. uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); }
  84. Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
  85. uint32_t NumBytes) const {
  86. uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
  87. ArrayRef<uint8_t> Result;
  88. if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
  89. return std::move(EC);
  90. return Result;
  91. }
  92. Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
  93. ArrayRef<uint8_t> Data) const {
  94. return make_error<RawError>(raw_error_code::not_writable,
  95. "PDBFile is immutable");
  96. }
  97. Error PDBFile::parseFileHeaders() {
  98. BinaryStreamReader Reader(*Buffer);
  99. // Initialize SB.
  100. const msf::SuperBlock *SB = nullptr;
  101. if (auto EC = Reader.readObject(SB)) {
  102. consumeError(std::move(EC));
  103. return make_error<RawError>(raw_error_code::corrupt_file,
  104. "MSF superblock is missing");
  105. }
  106. if (auto EC = msf::validateSuperBlock(*SB))
  107. return EC;
  108. if (Buffer->getLength() % SB->BlockSize != 0)
  109. return make_error<RawError>(raw_error_code::corrupt_file,
  110. "File size is not a multiple of block size");
  111. ContainerLayout.SB = SB;
  112. // Initialize Free Page Map.
  113. ContainerLayout.FreePageMap.resize(SB->NumBlocks);
  114. // The Fpm exists either at block 1 or block 2 of the MSF. However, this
  115. // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
  116. // thusly an equal number of total blocks in the file. For a block size
  117. // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
  118. // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
  119. // the Fpm is split across the file at `getBlockSize()` intervals. As a
  120. // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
  121. // for any non-negative integer k is an Fpm block. In theory, we only really
  122. // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
  123. // current versions of the MSF format already expect the Fpm to be arranged
  124. // at getBlockSize() intervals, so we have to be compatible.
  125. // See the function fpmPn() for more information:
  126. // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
  127. auto FpmStream =
  128. MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
  129. BinaryStreamReader FpmReader(*FpmStream);
  130. ArrayRef<uint8_t> FpmBytes;
  131. if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
  132. return EC;
  133. uint32_t BlocksRemaining = getBlockCount();
  134. uint32_t BI = 0;
  135. for (auto Byte : FpmBytes) {
  136. uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
  137. for (uint32_t I = 0; I < BlocksThisByte; ++I) {
  138. if (Byte & (1 << I))
  139. ContainerLayout.FreePageMap[BI] = true;
  140. --BlocksRemaining;
  141. ++BI;
  142. }
  143. }
  144. Reader.setOffset(getBlockMapOffset());
  145. if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
  146. getNumDirectoryBlocks()))
  147. return EC;
  148. return Error::success();
  149. }
  150. Error PDBFile::parseStreamData() {
  151. assert(ContainerLayout.SB);
  152. if (DirectoryStream)
  153. return Error::success();
  154. uint32_t NumStreams = 0;
  155. // Normally you can't use a MappedBlockStream without having fully parsed the
  156. // PDB file, because it accesses the directory and various other things, which
  157. // is exactly what we are attempting to parse. By specifying a custom
  158. // subclass of IPDBStreamData which only accesses the fields that have already
  159. // been parsed, we can avoid this and reuse MappedBlockStream.
  160. auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
  161. Allocator);
  162. BinaryStreamReader Reader(*DS);
  163. if (auto EC = Reader.readInteger(NumStreams))
  164. return EC;
  165. if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
  166. return EC;
  167. for (uint32_t I = 0; I < NumStreams; ++I) {
  168. uint32_t StreamSize = getStreamByteSize(I);
  169. // FIXME: What does StreamSize ~0U mean?
  170. uint64_t NumExpectedStreamBlocks =
  171. StreamSize == UINT32_MAX
  172. ? 0
  173. : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
  174. // For convenience, we store the block array contiguously. This is because
  175. // if someone calls setStreamMap(), it is more convenient to be able to call
  176. // it with an ArrayRef instead of setting up a StreamRef. Since the
  177. // DirectoryStream is cached in the class and thus lives for the life of the
  178. // class, we can be guaranteed that readArray() will return a stable
  179. // reference, even if it has to allocate from its internal pool.
  180. ArrayRef<support::ulittle32_t> Blocks;
  181. if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
  182. return EC;
  183. for (uint32_t Block : Blocks) {
  184. uint64_t BlockEndOffset =
  185. (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
  186. if (BlockEndOffset > getFileSize())
  187. return make_error<RawError>(raw_error_code::corrupt_file,
  188. "Stream block map is corrupt.");
  189. }
  190. ContainerLayout.StreamMap.push_back(Blocks);
  191. }
  192. // We should have read exactly SB->NumDirectoryBytes bytes.
  193. assert(Reader.bytesRemaining() == 0);
  194. DirectoryStream = std::move(DS);
  195. return Error::success();
  196. }
  197. ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
  198. return ContainerLayout.DirectoryBlocks;
  199. }
  200. std::unique_ptr<MappedBlockStream>
  201. PDBFile::createIndexedStream(uint16_t SN) const {
  202. if (SN == kInvalidStreamIndex)
  203. return nullptr;
  204. return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
  205. Allocator);
  206. }
  207. MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
  208. MSFStreamLayout Result;
  209. auto Blocks = getStreamBlockList(StreamIdx);
  210. Result.Blocks.assign(Blocks.begin(), Blocks.end());
  211. Result.Length = getStreamByteSize(StreamIdx);
  212. return Result;
  213. }
  214. msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
  215. return msf::getFpmStreamLayout(ContainerLayout);
  216. }
  217. Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
  218. if (!Globals) {
  219. auto DbiS = getPDBDbiStream();
  220. if (!DbiS)
  221. return DbiS.takeError();
  222. auto GlobalS =
  223. safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
  224. if (!GlobalS)
  225. return GlobalS.takeError();
  226. auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
  227. if (auto EC = TempGlobals->reload())
  228. return std::move(EC);
  229. Globals = std::move(TempGlobals);
  230. }
  231. return *Globals;
  232. }
  233. Expected<InfoStream &> PDBFile::getPDBInfoStream() {
  234. if (!Info) {
  235. auto InfoS = safelyCreateIndexedStream(StreamPDB);
  236. if (!InfoS)
  237. return InfoS.takeError();
  238. auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
  239. if (auto EC = TempInfo->reload())
  240. return std::move(EC);
  241. Info = std::move(TempInfo);
  242. }
  243. return *Info;
  244. }
  245. Expected<DbiStream &> PDBFile::getPDBDbiStream() {
  246. if (!Dbi) {
  247. auto DbiS = safelyCreateIndexedStream(StreamDBI);
  248. if (!DbiS)
  249. return DbiS.takeError();
  250. auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
  251. if (auto EC = TempDbi->reload(this))
  252. return std::move(EC);
  253. Dbi = std::move(TempDbi);
  254. }
  255. return *Dbi;
  256. }
  257. Expected<TpiStream &> PDBFile::getPDBTpiStream() {
  258. if (!Tpi) {
  259. auto TpiS = safelyCreateIndexedStream(StreamTPI);
  260. if (!TpiS)
  261. return TpiS.takeError();
  262. auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
  263. if (auto EC = TempTpi->reload())
  264. return std::move(EC);
  265. Tpi = std::move(TempTpi);
  266. }
  267. return *Tpi;
  268. }
  269. Expected<TpiStream &> PDBFile::getPDBIpiStream() {
  270. if (!Ipi) {
  271. if (!hasPDBIpiStream())
  272. return make_error<RawError>(raw_error_code::no_stream);
  273. auto IpiS = safelyCreateIndexedStream(StreamIPI);
  274. if (!IpiS)
  275. return IpiS.takeError();
  276. auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
  277. if (auto EC = TempIpi->reload())
  278. return std::move(EC);
  279. Ipi = std::move(TempIpi);
  280. }
  281. return *Ipi;
  282. }
  283. Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
  284. if (!Publics) {
  285. auto DbiS = getPDBDbiStream();
  286. if (!DbiS)
  287. return DbiS.takeError();
  288. auto PublicS =
  289. safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
  290. if (!PublicS)
  291. return PublicS.takeError();
  292. auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
  293. if (auto EC = TempPublics->reload())
  294. return std::move(EC);
  295. Publics = std::move(TempPublics);
  296. }
  297. return *Publics;
  298. }
  299. Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
  300. if (!Symbols) {
  301. auto DbiS = getPDBDbiStream();
  302. if (!DbiS)
  303. return DbiS.takeError();
  304. uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
  305. auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
  306. if (!SymbolS)
  307. return SymbolS.takeError();
  308. auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
  309. if (auto EC = TempSymbols->reload())
  310. return std::move(EC);
  311. Symbols = std::move(TempSymbols);
  312. }
  313. return *Symbols;
  314. }
  315. Expected<PDBStringTable &> PDBFile::getStringTable() {
  316. if (!Strings) {
  317. auto NS = safelyCreateNamedStream("/names");
  318. if (!NS)
  319. return NS.takeError();
  320. auto N = std::make_unique<PDBStringTable>();
  321. BinaryStreamReader Reader(**NS);
  322. if (auto EC = N->reload(Reader))
  323. return std::move(EC);
  324. assert(Reader.bytesRemaining() == 0);
  325. StringTableStream = std::move(*NS);
  326. Strings = std::move(N);
  327. }
  328. return *Strings;
  329. }
  330. Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
  331. if (!InjectedSources) {
  332. auto IJS = safelyCreateNamedStream("/src/headerblock");
  333. if (!IJS)
  334. return IJS.takeError();
  335. auto Strings = getStringTable();
  336. if (!Strings)
  337. return Strings.takeError();
  338. auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
  339. if (auto EC = IJ->reload(*Strings))
  340. return std::move(EC);
  341. InjectedSources = std::move(IJ);
  342. }
  343. return *InjectedSources;
  344. }
  345. uint32_t PDBFile::getPointerSize() {
  346. auto DbiS = getPDBDbiStream();
  347. if (!DbiS)
  348. return 0;
  349. PDB_Machine Machine = DbiS->getMachineType();
  350. if (Machine == PDB_Machine::Amd64)
  351. return 8;
  352. return 4;
  353. }
  354. bool PDBFile::hasPDBDbiStream() const {
  355. return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
  356. }
  357. bool PDBFile::hasPDBGlobalsStream() {
  358. auto DbiS = getPDBDbiStream();
  359. if (!DbiS) {
  360. consumeError(DbiS.takeError());
  361. return false;
  362. }
  363. return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
  364. }
  365. bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
  366. bool PDBFile::hasPDBIpiStream() const {
  367. if (!hasPDBInfoStream())
  368. return false;
  369. if (StreamIPI >= getNumStreams())
  370. return false;
  371. auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
  372. return InfoStream.containsIdStream();
  373. }
  374. bool PDBFile::hasPDBPublicsStream() {
  375. auto DbiS = getPDBDbiStream();
  376. if (!DbiS) {
  377. consumeError(DbiS.takeError());
  378. return false;
  379. }
  380. return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
  381. }
  382. bool PDBFile::hasPDBSymbolStream() {
  383. auto DbiS = getPDBDbiStream();
  384. if (!DbiS)
  385. return false;
  386. return DbiS->getSymRecordStreamIndex() < getNumStreams();
  387. }
  388. bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
  389. bool PDBFile::hasPDBStringTable() {
  390. auto IS = getPDBInfoStream();
  391. if (!IS)
  392. return false;
  393. Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
  394. if (!ExpectedNSI) {
  395. consumeError(ExpectedNSI.takeError());
  396. return false;
  397. }
  398. assert(*ExpectedNSI < getNumStreams());
  399. return true;
  400. }
  401. bool PDBFile::hasPDBInjectedSourceStream() {
  402. auto IS = getPDBInfoStream();
  403. if (!IS)
  404. return false;
  405. Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
  406. if (!ExpectedNSI) {
  407. consumeError(ExpectedNSI.takeError());
  408. return false;
  409. }
  410. assert(*ExpectedNSI < getNumStreams());
  411. return true;
  412. }
  413. /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
  414. /// stream with that index actually exists. If it does not, the return value
  415. /// will have an MSFError with code msf_error_code::no_stream. Else, the return
  416. /// value will contain the stream returned by createIndexedStream().
  417. Expected<std::unique_ptr<MappedBlockStream>>
  418. PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
  419. if (StreamIndex >= getNumStreams())
  420. // This rejects kInvalidStreamIndex with an error as well.
  421. return make_error<RawError>(raw_error_code::no_stream);
  422. return createIndexedStream(StreamIndex);
  423. }
  424. Expected<std::unique_ptr<MappedBlockStream>>
  425. PDBFile::safelyCreateNamedStream(StringRef Name) {
  426. auto IS = getPDBInfoStream();
  427. if (!IS)
  428. return IS.takeError();
  429. Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
  430. if (!ExpectedNSI)
  431. return ExpectedNSI.takeError();
  432. uint32_t NameStreamIndex = *ExpectedNSI;
  433. return safelyCreateIndexedStream(NameStreamIndex);
  434. }