InputFile.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/DebugInfo/PDB/Native/InputFile.h"
  9. #include "llvm/BinaryFormat/Magic.h"
  10. #include "llvm/DebugInfo/CodeView/CodeView.h"
  11. #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
  12. #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
  13. #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
  14. #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
  15. #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
  16. #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
  17. #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
  18. #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  19. #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
  20. #include "llvm/DebugInfo/PDB/Native/RawError.h"
  21. #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  22. #include "llvm/DebugInfo/PDB/PDB.h"
  23. #include "llvm/Object/COFF.h"
  24. #include "llvm/Support/FileSystem.h"
  25. #include "llvm/Support/FormatVariadic.h"
  26. using namespace llvm;
  27. using namespace llvm::codeview;
  28. using namespace llvm::object;
  29. using namespace llvm::pdb;
  30. InputFile::InputFile() = default;
  31. InputFile::~InputFile() = default;
  32. Expected<ModuleDebugStreamRef>
  33. llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
  34. uint32_t Index) {
  35. Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
  36. if (!DbiOrErr)
  37. return DbiOrErr.takeError();
  38. DbiStream &Dbi = *DbiOrErr;
  39. const auto &Modules = Dbi.modules();
  40. if (Index >= Modules.getModuleCount())
  41. return make_error<RawError>(raw_error_code::index_out_of_bounds,
  42. "Invalid module index");
  43. auto Modi = Modules.getModuleDescriptor(Index);
  44. ModuleName = Modi.getModuleName();
  45. uint16_t ModiStream = Modi.getModuleStreamIndex();
  46. if (ModiStream == kInvalidStreamIndex)
  47. return make_error<RawError>(raw_error_code::no_stream,
  48. "Module stream not present");
  49. auto ModStreamData = File.createIndexedStream(ModiStream);
  50. ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
  51. if (auto EC = ModS.reload())
  52. return make_error<RawError>(raw_error_code::corrupt_file,
  53. "Invalid module stream");
  54. return std::move(ModS);
  55. }
  56. Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
  57. uint32_t Index) {
  58. Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
  59. if (!DbiOrErr)
  60. return DbiOrErr.takeError();
  61. DbiStream &Dbi = *DbiOrErr;
  62. const auto &Modules = Dbi.modules();
  63. auto Modi = Modules.getModuleDescriptor(Index);
  64. uint16_t ModiStream = Modi.getModuleStreamIndex();
  65. if (ModiStream == kInvalidStreamIndex)
  66. return make_error<RawError>(raw_error_code::no_stream,
  67. "Module stream not present");
  68. auto ModStreamData = File.createIndexedStream(ModiStream);
  69. ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
  70. if (Error Err = ModS.reload())
  71. return make_error<RawError>(raw_error_code::corrupt_file,
  72. "Invalid module stream");
  73. return std::move(ModS);
  74. }
  75. static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
  76. StringRef Name,
  77. BinaryStreamReader &Reader) {
  78. if (Expected<StringRef> NameOrErr = Section.getName()) {
  79. if (*NameOrErr != Name)
  80. return false;
  81. } else {
  82. consumeError(NameOrErr.takeError());
  83. return false;
  84. }
  85. Expected<StringRef> ContentsOrErr = Section.getContents();
  86. if (!ContentsOrErr) {
  87. consumeError(ContentsOrErr.takeError());
  88. return false;
  89. }
  90. Reader = BinaryStreamReader(*ContentsOrErr, support::little);
  91. uint32_t Magic;
  92. if (Reader.bytesRemaining() < sizeof(uint32_t))
  93. return false;
  94. cantFail(Reader.readInteger(Magic));
  95. if (Magic != COFF::DEBUG_SECTION_MAGIC)
  96. return false;
  97. return true;
  98. }
  99. static inline bool isDebugSSection(object::SectionRef Section,
  100. DebugSubsectionArray &Subsections) {
  101. BinaryStreamReader Reader;
  102. if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
  103. return false;
  104. cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
  105. return true;
  106. }
  107. static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
  108. BinaryStreamReader Reader;
  109. if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
  110. !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
  111. return false;
  112. cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
  113. return true;
  114. }
  115. static std::string formatChecksumKind(FileChecksumKind Kind) {
  116. switch (Kind) {
  117. RETURN_CASE(FileChecksumKind, None, "None");
  118. RETURN_CASE(FileChecksumKind, MD5, "MD5");
  119. RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
  120. RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
  121. }
  122. return formatUnknownEnum(Kind);
  123. }
  124. template <typename... Args>
  125. static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
  126. if (Append)
  127. Printer.format(std::forward<Args>(args)...);
  128. else
  129. Printer.formatLine(std::forward<Args>(args)...);
  130. }
  131. SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
  132. if (!File)
  133. return;
  134. if (File->isPdb())
  135. initializeForPdb(GroupIndex);
  136. else {
  137. Name = ".debug$S";
  138. uint32_t I = 0;
  139. for (const auto &S : File->obj().sections()) {
  140. DebugSubsectionArray SS;
  141. if (!isDebugSSection(S, SS))
  142. continue;
  143. if (!SC.hasChecksums() || !SC.hasStrings())
  144. SC.initialize(SS);
  145. if (I == GroupIndex)
  146. Subsections = SS;
  147. if (SC.hasChecksums() && SC.hasStrings())
  148. break;
  149. }
  150. rebuildChecksumMap();
  151. }
  152. }
  153. StringRef SymbolGroup::name() const { return Name; }
  154. void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
  155. Subsections = SS;
  156. }
  157. void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
  158. void SymbolGroup::initializeForPdb(uint32_t Modi) {
  159. assert(File && File->isPdb());
  160. // PDB always uses the same string table, but each module has its own
  161. // checksums. So we only set the strings if they're not already set.
  162. if (!SC.hasStrings()) {
  163. auto StringTable = File->pdb().getStringTable();
  164. if (StringTable)
  165. SC.setStrings(StringTable->getStringTable());
  166. else
  167. consumeError(StringTable.takeError());
  168. }
  169. SC.resetChecksums();
  170. auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
  171. if (!MDS) {
  172. consumeError(MDS.takeError());
  173. return;
  174. }
  175. DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
  176. Subsections = DebugStream->getSubsectionsArray();
  177. SC.initialize(Subsections);
  178. rebuildChecksumMap();
  179. }
  180. void SymbolGroup::rebuildChecksumMap() {
  181. if (!SC.hasChecksums())
  182. return;
  183. for (const auto &Entry : SC.checksums()) {
  184. auto S = SC.strings().getString(Entry.FileNameOffset);
  185. if (!S)
  186. continue;
  187. ChecksumsByFile[*S] = Entry;
  188. }
  189. }
  190. const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
  191. assert(File && File->isPdb() && DebugStream);
  192. return *DebugStream;
  193. }
  194. Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
  195. return SC.strings().getString(Offset);
  196. }
  197. Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
  198. StringRef Name;
  199. if (!SC.hasChecksums()) {
  200. return std::move(Name);
  201. }
  202. auto Iter = SC.checksums().getArray().at(Offset);
  203. if (Iter == SC.checksums().getArray().end()) {
  204. return std::move(Name);
  205. }
  206. uint32_t FO = Iter->FileNameOffset;
  207. auto ExpectedFile = getNameFromStringTable(FO);
  208. if (!ExpectedFile) {
  209. return std::move(Name);
  210. }
  211. return *ExpectedFile;
  212. }
  213. void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
  214. bool Append) const {
  215. auto FC = ChecksumsByFile.find(File);
  216. if (FC == ChecksumsByFile.end()) {
  217. formatInternal(Printer, Append, "- (no checksum) {0}", File);
  218. return;
  219. }
  220. formatInternal(Printer, Append, "- ({0}: {1}) {2}",
  221. formatChecksumKind(FC->getValue().Kind),
  222. toHex(FC->getValue().Checksum), File);
  223. }
  224. void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
  225. uint32_t Offset,
  226. bool Append) const {
  227. if (!SC.hasChecksums()) {
  228. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  229. return;
  230. }
  231. auto Iter = SC.checksums().getArray().at(Offset);
  232. if (Iter == SC.checksums().getArray().end()) {
  233. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  234. return;
  235. }
  236. uint32_t FO = Iter->FileNameOffset;
  237. auto ExpectedFile = getNameFromStringTable(FO);
  238. if (!ExpectedFile) {
  239. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  240. consumeError(ExpectedFile.takeError());
  241. return;
  242. }
  243. if (Iter->Kind == FileChecksumKind::None) {
  244. formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
  245. } else {
  246. formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
  247. formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
  248. }
  249. }
  250. Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
  251. InputFile IF;
  252. if (!llvm::sys::fs::exists(Path))
  253. return make_error<StringError>(formatv("File {0} not found", Path),
  254. inconvertibleErrorCode());
  255. file_magic Magic;
  256. if (auto EC = identify_magic(Path, Magic))
  257. return make_error<StringError>(
  258. formatv("Unable to identify file type for file {0}", Path), EC);
  259. if (Magic == file_magic::coff_object) {
  260. Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
  261. if (!BinaryOrErr)
  262. return BinaryOrErr.takeError();
  263. IF.CoffObject = std::move(*BinaryOrErr);
  264. IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
  265. return std::move(IF);
  266. }
  267. if (Magic == file_magic::pdb) {
  268. std::unique_ptr<IPDBSession> Session;
  269. if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
  270. return std::move(Err);
  271. IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
  272. IF.PdbOrObj = &IF.PdbSession->getPDBFile();
  273. return std::move(IF);
  274. }
  275. if (!AllowUnknownFile)
  276. return make_error<StringError>(
  277. formatv("File {0} is not a supported file type", Path),
  278. inconvertibleErrorCode());
  279. auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
  280. /*RequiresNullTerminator=*/false);
  281. if (!Result)
  282. return make_error<StringError>(
  283. formatv("File {0} could not be opened", Path), Result.getError());
  284. IF.UnknownFile = std::move(*Result);
  285. IF.PdbOrObj = IF.UnknownFile.get();
  286. return std::move(IF);
  287. }
  288. PDBFile &InputFile::pdb() {
  289. assert(isPdb());
  290. return *PdbOrObj.get<PDBFile *>();
  291. }
  292. const PDBFile &InputFile::pdb() const {
  293. assert(isPdb());
  294. return *PdbOrObj.get<PDBFile *>();
  295. }
  296. object::COFFObjectFile &InputFile::obj() {
  297. assert(isObj());
  298. return *PdbOrObj.get<object::COFFObjectFile *>();
  299. }
  300. const object::COFFObjectFile &InputFile::obj() const {
  301. assert(isObj());
  302. return *PdbOrObj.get<object::COFFObjectFile *>();
  303. }
  304. MemoryBuffer &InputFile::unknown() {
  305. assert(isUnknown());
  306. return *PdbOrObj.get<MemoryBuffer *>();
  307. }
  308. const MemoryBuffer &InputFile::unknown() const {
  309. assert(isUnknown());
  310. return *PdbOrObj.get<MemoryBuffer *>();
  311. }
  312. StringRef InputFile::getFilePath() const {
  313. if (isPdb())
  314. return pdb().getFilePath();
  315. if (isObj())
  316. return obj().getFileName();
  317. assert(isUnknown());
  318. return unknown().getBufferIdentifier();
  319. }
  320. bool InputFile::hasTypes() const {
  321. if (isPdb())
  322. return pdb().hasPDBTpiStream();
  323. for (const auto &Section : obj().sections()) {
  324. CVTypeArray Types;
  325. if (isDebugTSection(Section, Types))
  326. return true;
  327. }
  328. return false;
  329. }
  330. bool InputFile::hasIds() const {
  331. if (isObj())
  332. return false;
  333. return pdb().hasPDBIpiStream();
  334. }
  335. bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
  336. bool InputFile::isObj() const {
  337. return PdbOrObj.is<object::COFFObjectFile *>();
  338. }
  339. bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
  340. codeview::LazyRandomTypeCollection &
  341. InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
  342. if (Types && Kind == kTypes)
  343. return *Types;
  344. if (Ids && Kind == kIds)
  345. return *Ids;
  346. if (Kind == kIds) {
  347. assert(isPdb() && pdb().hasPDBIpiStream());
  348. }
  349. // If the collection was already initialized, we should have just returned it
  350. // in step 1.
  351. if (isPdb()) {
  352. TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
  353. auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
  354. : pdb().getPDBTpiStream());
  355. auto &Array = Stream.typeArray();
  356. uint32_t Count = Stream.getNumTypeRecords();
  357. auto Offsets = Stream.getTypeIndexOffsets();
  358. Collection =
  359. std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
  360. return *Collection;
  361. }
  362. assert(isObj());
  363. assert(Kind == kTypes);
  364. assert(!Types);
  365. for (const auto &Section : obj().sections()) {
  366. CVTypeArray Records;
  367. if (!isDebugTSection(Section, Records))
  368. continue;
  369. Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
  370. return *Types;
  371. }
  372. Types = std::make_unique<LazyRandomTypeCollection>(100);
  373. return *Types;
  374. }
  375. codeview::LazyRandomTypeCollection &InputFile::types() {
  376. return getOrCreateTypeCollection(kTypes);
  377. }
  378. codeview::LazyRandomTypeCollection &InputFile::ids() {
  379. // Object files have only one type stream that contains both types and ids.
  380. // Similarly, some PDBs don't contain an IPI stream, and for those both types
  381. // and IDs are in the same stream.
  382. if (isObj() || !pdb().hasPDBIpiStream())
  383. return types();
  384. return getOrCreateTypeCollection(kIds);
  385. }
  386. iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
  387. return make_range<SymbolGroupIterator>(symbol_groups_begin(),
  388. symbol_groups_end());
  389. }
  390. SymbolGroupIterator InputFile::symbol_groups_begin() {
  391. return SymbolGroupIterator(*this);
  392. }
  393. SymbolGroupIterator InputFile::symbol_groups_end() {
  394. return SymbolGroupIterator();
  395. }
  396. SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
  397. SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
  398. if (File.isObj()) {
  399. SectionIter = File.obj().section_begin();
  400. scanToNextDebugS();
  401. }
  402. }
  403. bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
  404. bool E = isEnd();
  405. bool RE = R.isEnd();
  406. if (E || RE)
  407. return E == RE;
  408. if (Value.File != R.Value.File)
  409. return false;
  410. return Index == R.Index;
  411. }
  412. const SymbolGroup &SymbolGroupIterator::operator*() const {
  413. assert(!isEnd());
  414. return Value;
  415. }
  416. SymbolGroup &SymbolGroupIterator::operator*() {
  417. assert(!isEnd());
  418. return Value;
  419. }
  420. SymbolGroupIterator &SymbolGroupIterator::operator++() {
  421. assert(Value.File && !isEnd());
  422. ++Index;
  423. if (isEnd())
  424. return *this;
  425. if (Value.File->isPdb()) {
  426. Value.updatePdbModi(Index);
  427. return *this;
  428. }
  429. scanToNextDebugS();
  430. return *this;
  431. }
  432. void SymbolGroupIterator::scanToNextDebugS() {
  433. assert(SectionIter);
  434. auto End = Value.File->obj().section_end();
  435. auto &Iter = *SectionIter;
  436. assert(!isEnd());
  437. while (++Iter != End) {
  438. DebugSubsectionArray SS;
  439. SectionRef SR = *Iter;
  440. if (!isDebugSSection(SR, SS))
  441. continue;
  442. Value.updateDebugS(SS);
  443. return;
  444. }
  445. }
  446. bool SymbolGroupIterator::isEnd() const {
  447. if (!Value.File)
  448. return true;
  449. if (Value.File->isPdb()) {
  450. DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
  451. uint32_t Count = Dbi.modules().getModuleCount();
  452. assert(Index <= Count);
  453. return Index == Count;
  454. }
  455. assert(SectionIter);
  456. return *SectionIter == Value.File->obj().section_end();
  457. }
  458. static bool isMyCode(const SymbolGroup &Group) {
  459. if (Group.getFile().isObj())
  460. return true;
  461. StringRef Name = Group.name();
  462. if (Name.startswith("Import:"))
  463. return false;
  464. if (Name.endswith_insensitive(".dll"))
  465. return false;
  466. if (Name.equals_insensitive("* linker *"))
  467. return false;
  468. if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools"))
  469. return false;
  470. if (Name.startswith_insensitive("f:\\dd\\vctools\\crt"))
  471. return false;
  472. return true;
  473. }
  474. bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
  475. const FilterOptions &Filters) {
  476. if (Filters.JustMyCode && !isMyCode(Group))
  477. return false;
  478. // If the arg was not specified on the command line, always dump all modules.
  479. if (!Filters.DumpModi)
  480. return true;
  481. // Otherwise, only dump if this is the same module specified.
  482. return (Filters.DumpModi == Idx);
  483. }