InputFile.cpp 14 KB

  1. //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "InputFile.h"
  9. #include "FormatUtil.h"
  10. #include "LinePrinter.h"
  11. #include "llvm/BinaryFormat/Magic.h"
  12. #include "llvm/DebugInfo/CodeView/CodeView.h"
  13. #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
  14. #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
  15. #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
  16. #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
  17. #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  18. #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
  19. #include "llvm/DebugInfo/PDB/Native/RawError.h"
  20. #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  21. #include "llvm/DebugInfo/PDB/PDB.h"
  22. #include "llvm/Object/COFF.h"
  23. #include "llvm/Support/FileSystem.h"
  24. #include "llvm/Support/FormatVariadic.h"
  25. using namespace llvm;
  26. using namespace llvm::codeview;
  27. using namespace llvm::object;
  28. using namespace llvm::pdb;
  29. InputFile::InputFile() {}
  30. InputFile::~InputFile() {}
  31. static Expected<ModuleDebugStreamRef>
  32. getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
  33. ExitOnError Err("Unexpected error: ");
  34. auto &Dbi = Err(File.getPDBDbiStream());
  35. const auto &Modules = Dbi.modules();
  36. if (Index >= Modules.getModuleCount())
  37. return make_error<RawError>(raw_error_code::index_out_of_bounds,
  38. "Invalid module index");
  39. auto Modi = Modules.getModuleDescriptor(Index);
  40. ModuleName = Modi.getModuleName();
  41. uint16_t ModiStream = Modi.getModuleStreamIndex();
  42. if (ModiStream == kInvalidStreamIndex)
  43. return make_error<RawError>(raw_error_code::no_stream,
  44. "Module stream not present");
  45. auto ModStreamData = File.createIndexedStream(ModiStream);
  46. ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
  47. if (auto EC = ModS.reload())
  48. return make_error<RawError>(raw_error_code::corrupt_file,
  49. "Invalid module stream");
  50. return std::move(ModS);
  51. }
  52. static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
  53. StringRef Name,
  54. BinaryStreamReader &Reader) {
  55. if (Expected<StringRef> NameOrErr = Section.getName()) {
  56. if (*NameOrErr != Name)
  57. return false;
  58. } else {
  59. consumeError(NameOrErr.takeError());
  60. return false;
  61. }
  62. Expected<StringRef> ContentsOrErr = Section.getContents();
  63. if (!ContentsOrErr) {
  64. consumeError(ContentsOrErr.takeError());
  65. return false;
  66. }
  67. Reader = BinaryStreamReader(*ContentsOrErr, support::little);
  68. uint32_t Magic;
  69. if (Reader.bytesRemaining() < sizeof(uint32_t))
  70. return false;
  71. cantFail(Reader.readInteger(Magic));
  72. if (Magic != COFF::DEBUG_SECTION_MAGIC)
  73. return false;
  74. return true;
  75. }
  76. static inline bool isDebugSSection(object::SectionRef Section,
  77. DebugSubsectionArray &Subsections) {
  78. BinaryStreamReader Reader;
  79. if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
  80. return false;
  81. cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
  82. return true;
  83. }
  84. static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
  85. BinaryStreamReader Reader;
  86. if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
  87. !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
  88. return false;
  89. cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
  90. return true;
  91. }
  92. static std::string formatChecksumKind(FileChecksumKind Kind) {
  93. switch (Kind) {
  94. RETURN_CASE(FileChecksumKind, None, "None");
  95. RETURN_CASE(FileChecksumKind, MD5, "MD5");
  96. RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
  97. RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
  98. }
  99. return formatUnknownEnum(Kind);
  100. }
  101. template <typename... Args>
  102. static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
  103. if (Append)
  104. Printer.format(std::forward<Args>(args)...);
  105. else
  106. Printer.formatLine(std::forward<Args>(args)...);
  107. }
  108. SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
  109. if (!File)
  110. return;
  111. if (File->isPdb())
  112. initializeForPdb(GroupIndex);
  113. else {
  114. Name = ".debug$S";
  115. uint32_t I = 0;
  116. for (const auto &S : File->obj().sections()) {
  117. DebugSubsectionArray SS;
  118. if (!isDebugSSection(S, SS))
  119. continue;
  120. if (!SC.hasChecksums() || !SC.hasStrings())
  121. SC.initialize(SS);
  122. if (I == GroupIndex)
  123. Subsections = SS;
  124. if (SC.hasChecksums() && SC.hasStrings())
  125. break;
  126. }
  127. rebuildChecksumMap();
  128. }
  129. }
  130. StringRef SymbolGroup::name() const { return Name; }
  131. void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
  132. Subsections = SS;
  133. }
  134. void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
  135. void SymbolGroup::initializeForPdb(uint32_t Modi) {
  136. assert(File && File->isPdb());
  137. // PDB always uses the same string table, but each module has its own
  138. // checksums. So we only set the strings if they're not already set.
  139. if (!SC.hasStrings()) {
  140. auto StringTable = File->pdb().getStringTable();
  141. if (StringTable)
  142. SC.setStrings(StringTable->getStringTable());
  143. else
  144. consumeError(StringTable.takeError());
  145. }
  146. SC.resetChecksums();
  147. auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
  148. if (!MDS) {
  149. consumeError(MDS.takeError());
  150. return;
  151. }
  152. DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
  153. Subsections = DebugStream->getSubsectionsArray();
  154. SC.initialize(Subsections);
  155. rebuildChecksumMap();
  156. }
  157. void SymbolGroup::rebuildChecksumMap() {
  158. if (!SC.hasChecksums())
  159. return;
  160. for (const auto &Entry : SC.checksums()) {
  161. auto S = SC.strings().getString(Entry.FileNameOffset);
  162. if (!S)
  163. continue;
  164. ChecksumsByFile[*S] = Entry;
  165. }
  166. }
  167. const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
  168. assert(File && File->isPdb() && DebugStream);
  169. return *DebugStream;
  170. }
  171. Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
  172. return SC.strings().getString(Offset);
  173. }
  174. void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
  175. bool Append) const {
  176. auto FC = ChecksumsByFile.find(File);
  177. if (FC == ChecksumsByFile.end()) {
  178. formatInternal(Printer, Append, "- (no checksum) {0}", File);
  179. return;
  180. }
  181. formatInternal(Printer, Append, "- ({0}: {1}) {2}",
  182. formatChecksumKind(FC->getValue().Kind),
  183. toHex(FC->getValue().Checksum), File);
  184. }
  185. void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
  186. uint32_t Offset,
  187. bool Append) const {
  188. if (!SC.hasChecksums()) {
  189. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  190. return;
  191. }
  192. auto Iter = SC.checksums().getArray().at(Offset);
  193. if (Iter == SC.checksums().getArray().end()) {
  194. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  195. return;
  196. }
  197. uint32_t FO = Iter->FileNameOffset;
  198. auto ExpectedFile = getNameFromStringTable(FO);
  199. if (!ExpectedFile) {
  200. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  201. consumeError(ExpectedFile.takeError());
  202. return;
  203. }
  204. if (Iter->Kind == FileChecksumKind::None) {
  205. formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
  206. } else {
  207. formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
  208. formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
  209. }
  210. }
  211. Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
  212. InputFile IF;
  213. if (!llvm::sys::fs::exists(Path))
  214. return make_error<StringError>(formatv("File {0} not found", Path),
  215. inconvertibleErrorCode());
  216. file_magic Magic;
  217. if (auto EC = identify_magic(Path, Magic))
  218. return make_error<StringError>(
  219. formatv("Unable to identify file type for file {0}", Path), EC);
  220. if (Magic == file_magic::coff_object) {
  221. Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
  222. if (!BinaryOrErr)
  223. return BinaryOrErr.takeError();
  224. IF.CoffObject = std::move(*BinaryOrErr);
  225. IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
  226. return std::move(IF);
  227. }
  228. if (Magic == file_magic::pdb) {
  229. std::unique_ptr<IPDBSession> Session;
  230. if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
  231. return std::move(Err);
  232. IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
  233. IF.PdbOrObj = &IF.PdbSession->getPDBFile();
  234. return std::move(IF);
  235. }
  236. if (!AllowUnknownFile)
  237. return make_error<StringError>(
  238. formatv("File {0} is not a supported file type", Path),
  239. inconvertibleErrorCode());
  240. auto Result = MemoryBuffer::getFile(Path, -1LL, false);
  241. if (!Result)
  242. return make_error<StringError>(
  243. formatv("File {0} could not be opened", Path), Result.getError());
  244. IF.UnknownFile = std::move(*Result);
  245. IF.PdbOrObj = IF.UnknownFile.get();
  246. return std::move(IF);
  247. }
  248. PDBFile &InputFile::pdb() {
  249. assert(isPdb());
  250. return *PdbOrObj.get<PDBFile *>();
  251. }
  252. const PDBFile &InputFile::pdb() const {
  253. assert(isPdb());
  254. return *PdbOrObj.get<PDBFile *>();
  255. }
  256. object::COFFObjectFile &InputFile::obj() {
  257. assert(isObj());
  258. return *PdbOrObj.get<object::COFFObjectFile *>();
  259. }
  260. const object::COFFObjectFile &InputFile::obj() const {
  261. assert(isObj());
  262. return *PdbOrObj.get<object::COFFObjectFile *>();
  263. }
  264. MemoryBuffer &InputFile::unknown() {
  265. assert(isUnknown());
  266. return *PdbOrObj.get<MemoryBuffer *>();
  267. }
  268. const MemoryBuffer &InputFile::unknown() const {
  269. assert(isUnknown());
  270. return *PdbOrObj.get<MemoryBuffer *>();
  271. }
  272. StringRef InputFile::getFilePath() const {
  273. if (isPdb())
  274. return pdb().getFilePath();
  275. if (isObj())
  276. return obj().getFileName();
  277. assert(isUnknown());
  278. return unknown().getBufferIdentifier();
  279. }
  280. bool InputFile::hasTypes() const {
  281. if (isPdb())
  282. return pdb().hasPDBTpiStream();
  283. for (const auto &Section : obj().sections()) {
  284. CVTypeArray Types;
  285. if (isDebugTSection(Section, Types))
  286. return true;
  287. }
  288. return false;
  289. }
  290. bool InputFile::hasIds() const {
  291. if (isObj())
  292. return false;
  293. return pdb().hasPDBIpiStream();
  294. }
  295. bool InputFile::isPdb() const { return<PDBFile *>(); }
  296. bool InputFile::isObj() const {
  297. return<object::COFFObjectFile *>();
  298. }
  299. bool InputFile::isUnknown() const { return<MemoryBuffer *>(); }
  300. codeview::LazyRandomTypeCollection &
  301. InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
  302. if (Types && Kind == kTypes)
  303. return *Types;
  304. if (Ids && Kind == kIds)
  305. return *Ids;
  306. if (Kind == kIds) {
  307. assert(isPdb() && pdb().hasPDBIpiStream());
  308. }
  309. // If the collection was already initialized, we should have just returned it
  310. // in step 1.
  311. if (isPdb()) {
  312. TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
  313. auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
  314. : pdb().getPDBTpiStream());
  315. auto &Array = Stream.typeArray();
  316. uint32_t Count = Stream.getNumTypeRecords();
  317. auto Offsets = Stream.getTypeIndexOffsets();
  318. Collection =
  319. std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
  320. return *Collection;
  321. }
  322. assert(isObj());
  323. assert(Kind == kTypes);
  324. assert(!Types);
  325. for (const auto &Section : obj().sections()) {
  326. CVTypeArray Records;
  327. if (!isDebugTSection(Section, Records))
  328. continue;
  329. Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
  330. return *Types;
  331. }
  332. Types = std::make_unique<LazyRandomTypeCollection>(100);
  333. return *Types;
  334. }
  335. codeview::LazyRandomTypeCollection &InputFile::types() {
  336. return getOrCreateTypeCollection(kTypes);
  337. }
  338. codeview::LazyRandomTypeCollection &InputFile::ids() {
  339. // Object files have only one type stream that contains both types and ids.
  340. // Similarly, some PDBs don't contain an IPI stream, and for those both types
  341. // and IDs are in the same stream.
  342. if (isObj() || !pdb().hasPDBIpiStream())
  343. return types();
  344. return getOrCreateTypeCollection(kIds);
  345. }
  346. iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
  347. return make_range<SymbolGroupIterator>(symbol_groups_begin(),
  348. symbol_groups_end());
  349. }
  350. SymbolGroupIterator InputFile::symbol_groups_begin() {
  351. return SymbolGroupIterator(*this);
  352. }
  353. SymbolGroupIterator InputFile::symbol_groups_end() {
  354. return SymbolGroupIterator();
  355. }
  356. SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
  357. SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
  358. if (File.isObj()) {
  359. SectionIter = File.obj().section_begin();
  360. scanToNextDebugS();
  361. }
  362. }
  363. bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
  364. bool E = isEnd();
  365. bool RE = R.isEnd();
  366. if (E || RE)
  367. return E == RE;
  368. if (Value.File != R.Value.File)
  369. return false;
  370. return Index == R.Index;
  371. }
  372. const SymbolGroup &SymbolGroupIterator::operator*() const {
  373. assert(!isEnd());
  374. return Value;
  375. }
  376. SymbolGroup &SymbolGroupIterator::operator*() {
  377. assert(!isEnd());
  378. return Value;
  379. }
  380. SymbolGroupIterator &SymbolGroupIterator::operator++() {
  381. assert(Value.File && !isEnd());
  382. ++Index;
  383. if (isEnd())
  384. return *this;
  385. if (Value.File->isPdb()) {
  386. Value.updatePdbModi(Index);
  387. return *this;
  388. }
  389. scanToNextDebugS();
  390. return *this;
  391. }
  392. void SymbolGroupIterator::scanToNextDebugS() {
  393. assert(SectionIter.hasValue());
  394. auto End = Value.File->obj().section_end();
  395. auto &Iter = *SectionIter;
  396. assert(!isEnd());
  397. while (++Iter != End) {
  398. DebugSubsectionArray SS;
  399. SectionRef SR = *Iter;
  400. if (!isDebugSSection(SR, SS))
  401. continue;
  402. Value.updateDebugS(SS);
  403. return;
  404. }
  405. }
  406. bool SymbolGroupIterator::isEnd() const {
  407. if (!Value.File)
  408. return true;
  409. if (Value.File->isPdb()) {
  410. auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
  411. uint32_t Count = Dbi.modules().getModuleCount();
  412. assert(Index <= Count);
  413. return Index == Count;
  414. }
  415. assert(SectionIter.hasValue());
  416. return *SectionIter == Value.File->obj().section_end();
  417. }