llvm-gsymutil.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/ADT/STLExtras.h"
  9. #include "llvm/ADT/StringSet.h"
  10. #include "llvm/ADT/Triple.h"
  11. #include "llvm/DebugInfo/DIContext.h"
  12. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  13. #include "llvm/Object/Archive.h"
  14. #include "llvm/Object/ELFObjectFile.h"
  15. #include "llvm/Object/MachOUniversal.h"
  16. #include "llvm/Object/ObjectFile.h"
  17. #include "llvm/Support/CommandLine.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/Format.h"
  20. #include "llvm/Support/ManagedStatic.h"
  21. #include "llvm/Support/MemoryBuffer.h"
  22. #include "llvm/Support/PrettyStackTrace.h"
  23. #include "llvm/Support/Regex.h"
  24. #include "llvm/Support/Signals.h"
  25. #include "llvm/Support/TargetSelect.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include <algorithm>
  28. #include <cstring>
  29. #include <inttypes.h>
  30. #include <iostream>
  31. #include <map>
  32. #include <string>
  33. #include <system_error>
  34. #include <vector>
  35. #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
  36. #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
  37. #include "llvm/DebugInfo/GSYM/GsymCreator.h"
  38. #include "llvm/DebugInfo/GSYM/GsymReader.h"
  39. #include "llvm/DebugInfo/GSYM/InlineInfo.h"
  40. #include "llvm/DebugInfo/GSYM/LookupResult.h"
  41. #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
  42. using namespace llvm;
  43. using namespace gsym;
  44. using namespace object;
  45. /// @}
  46. /// Command line options.
  47. /// @{
  48. namespace {
  49. using namespace cl;
  50. OptionCategory GeneralOptions("Options");
  51. OptionCategory ConversionOptions("Conversion Options");
  52. OptionCategory LookupOptions("Lookup Options");
  53. static opt<bool> Help("h", desc("Alias for -help"), Hidden,
  54. cat(GeneralOptions));
  55. static opt<bool> Verbose("verbose",
  56. desc("Enable verbose logging and encoding details."),
  57. cat(GeneralOptions));
  58. static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
  59. ZeroOrMore, cat(GeneralOptions));
  60. static opt<std::string>
  61. ConvertFilename("convert", cl::init(""),
  62. cl::desc("Convert the specified file to the GSYM format.\n"
  63. "Supported files include ELF and mach-o files "
  64. "that will have their debug info (DWARF) and "
  65. "symbol table converted."),
  66. cl::value_desc("path"), cat(ConversionOptions));
  67. static list<std::string>
  68. ArchFilters("arch",
  69. desc("Process debug information for the specified CPU "
  70. "architecture only.\nArchitectures may be specified by "
  71. "name or by number.\nThis option can be specified "
  72. "multiple times, once for each desired architecture."),
  73. cl::value_desc("arch"), cat(ConversionOptions));
  74. static opt<std::string>
  75. OutputFilename("out-file", cl::init(""),
  76. cl::desc("Specify the path where the converted GSYM file "
  77. "will be saved.\nWhen not specified, a '.gsym' "
  78. "extension will be appended to the file name "
  79. "specified in the --convert option."),
  80. cl::value_desc("path"), cat(ConversionOptions));
  81. static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
  82. aliasopt(OutputFilename),
  83. cat(ConversionOptions));
  84. static opt<bool> Verify("verify",
  85. desc("Verify the generated GSYM file against the "
  86. "information in the file that was converted."),
  87. cat(ConversionOptions));
  88. static opt<unsigned>
  89. NumThreads("num-threads",
  90. desc("Specify the maximum number (n) of simultaneous threads "
  91. "to use when converting files to GSYM.\nDefaults to the "
  92. "number of cores on the current machine."),
  93. cl::value_desc("n"), cat(ConversionOptions));
  94. static opt<bool>
  95. Quiet("quiet", desc("Do not output warnings about the debug information"),
  96. cat(ConversionOptions));
  97. static list<uint64_t> LookupAddresses("address",
  98. desc("Lookup an address in a GSYM file"),
  99. cl::value_desc("addr"),
  100. cat(LookupOptions));
  101. static opt<bool> LookupAddressesFromStdin(
  102. "addresses-from-stdin",
  103. desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
  104. "line is expected to be of the following format: <addr> <gsym-path>"),
  105. cat(LookupOptions));
  106. } // namespace
  107. /// @}
  108. //===----------------------------------------------------------------------===//
  109. static void error(Error Err) {
  110. if (!Err)
  111. return;
  112. WithColor::error() << toString(std::move(Err)) << "\n";
  113. exit(1);
  114. }
  115. static void error(StringRef Prefix, llvm::Error Err) {
  116. if (!Err)
  117. return;
  118. errs() << Prefix << ": " << Err << "\n";
  119. consumeError(std::move(Err));
  120. exit(1);
  121. }
  122. static void error(StringRef Prefix, std::error_code EC) {
  123. if (!EC)
  124. return;
  125. errs() << Prefix << ": " << EC.message() << "\n";
  126. exit(1);
  127. }
  128. static uint32_t getCPUType(MachOObjectFile &MachO) {
  129. if (MachO.is64Bit())
  130. return MachO.getHeader64().cputype;
  131. else
  132. return MachO.getHeader().cputype;
  133. }
  134. /// Return true if the object file has not been filtered by an --arch option.
  135. static bool filterArch(MachOObjectFile &Obj) {
  136. if (ArchFilters.empty())
  137. return true;
  138. Triple ObjTriple(Obj.getArchTriple());
  139. StringRef ObjArch = ObjTriple.getArchName();
  140. for (auto Arch : ArchFilters) {
  141. // Match name.
  142. if (Arch == ObjArch)
  143. return true;
  144. // Match architecture number.
  145. unsigned Value;
  146. if (!StringRef(Arch).getAsInteger(0, Value))
  147. if (Value == getCPUType(Obj))
  148. return true;
  149. }
  150. return false;
  151. }
  152. /// Determine the virtual address that is considered the base address of an ELF
  153. /// object file.
  154. ///
  155. /// The base address of an ELF file is the the "p_vaddr" of the first program
  156. /// header whose "p_type" is PT_LOAD.
  157. ///
  158. /// \param ELFFile An ELF object file we will search.
  159. ///
  160. /// \returns A valid image base address if we are able to extract one.
  161. template <class ELFT>
  162. static llvm::Optional<uint64_t>
  163. getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
  164. auto PhdrRangeOrErr = ELFFile.program_headers();
  165. if (!PhdrRangeOrErr) {
  166. consumeError(PhdrRangeOrErr.takeError());
  167. return llvm::None;
  168. }
  169. for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
  170. if (Phdr.p_type == ELF::PT_LOAD)
  171. return (uint64_t)Phdr.p_vaddr;
  172. return llvm::None;
  173. }
  174. /// Determine the virtual address that is considered the base address of mach-o
  175. /// object file.
  176. ///
  177. /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
  178. ///
  179. /// \param MachO A mach-o object file we will search.
  180. ///
  181. /// \returns A valid image base address if we are able to extract one.
  182. static llvm::Optional<uint64_t>
  183. getImageBaseAddress(const object::MachOObjectFile *MachO) {
  184. for (const auto &Command : MachO->load_commands()) {
  185. if (Command.C.cmd == MachO::LC_SEGMENT) {
  186. MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
  187. StringRef SegName = SLC.segname;
  188. if (SegName == "__TEXT")
  189. return SLC.vmaddr;
  190. } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
  191. MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
  192. StringRef SegName = SLC.segname;
  193. if (SegName == "__TEXT")
  194. return SLC.vmaddr;
  195. }
  196. }
  197. return llvm::None;
  198. }
  199. /// Determine the virtual address that is considered the base address of an
  200. /// object file.
  201. ///
  202. /// Since GSYM files are used for symbolication, many clients will need to
  203. /// easily adjust addresses they find in stack traces so the lookups happen
  204. /// on unslid addresses from the original object file. If the base address of
  205. /// a GSYM file is set to the base address of the image, then this address
  206. /// adjusting is much easier.
  207. ///
  208. /// \param Obj An object file we will search.
  209. ///
  210. /// \returns A valid image base address if we are able to extract one.
  211. static llvm::Optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
  212. if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
  213. return getImageBaseAddress(MachO);
  214. else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
  215. return getImageBaseAddress(ELFObj->getELFFile());
  216. else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
  217. return getImageBaseAddress(ELFObj->getELFFile());
  218. else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
  219. return getImageBaseAddress(ELFObj->getELFFile());
  220. else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
  221. return getImageBaseAddress(ELFObj->getELFFile());
  222. return llvm::None;
  223. }
  224. static llvm::Error handleObjectFile(ObjectFile &Obj,
  225. const std::string &OutFile) {
  226. auto ThreadCount =
  227. NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
  228. auto &OS = outs();
  229. GsymCreator Gsym(Quiet);
  230. // See if we can figure out the base address for a given object file, and if
  231. // we can, then set the base address to use to this value. This will ease
  232. // symbolication since clients can slide the GSYM lookup addresses by using
  233. // the load bias of the shared library.
  234. if (auto ImageBaseAddr = getImageBaseAddress(Obj))
  235. Gsym.setBaseAddress(*ImageBaseAddr);
  236. // We need to know where the valid sections are that contain instructions.
  237. // See header documentation for DWARFTransformer::SetValidTextRanges() for
  238. // defails.
  239. AddressRanges TextRanges;
  240. for (const object::SectionRef &Sect : Obj.sections()) {
  241. if (!Sect.isText())
  242. continue;
  243. const uint64_t Size = Sect.getSize();
  244. if (Size == 0)
  245. continue;
  246. const uint64_t StartAddr = Sect.getAddress();
  247. TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
  248. }
  249. // Make sure there is DWARF to convert first.
  250. std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
  251. if (!DICtx)
  252. return createStringError(std::errc::invalid_argument,
  253. "unable to create DWARF context");
  254. logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), OS, "DwarfTransformer: ");
  255. // Make a DWARF transformer object and populate the ranges of the code
  256. // so we don't end up adding invalid functions to GSYM data.
  257. DwarfTransformer DT(*DICtx, OS, Gsym);
  258. if (!TextRanges.empty())
  259. Gsym.SetValidTextRanges(TextRanges);
  260. // Convert all DWARF to GSYM.
  261. if (auto Err = DT.convert(ThreadCount))
  262. return Err;
  263. // Get the UUID and convert symbol table to GSYM.
  264. if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
  265. return Err;
  266. // Finalize the GSYM to make it ready to save to disk. This will remove
  267. // duplicate FunctionInfo entries where we might have found an entry from
  268. // debug info and also a symbol table entry from the object file.
  269. if (auto Err = Gsym.finalize(OS))
  270. return Err;
  271. // Save the GSYM file to disk.
  272. support::endianness Endian =
  273. Obj.makeTriple().isLittleEndian() ? support::little : support::big;
  274. if (auto Err = Gsym.save(OutFile, Endian))
  275. return Err;
  276. // Verify the DWARF if requested. This will ensure all the info in the DWARF
  277. // can be looked up in the GSYM and that all lookups get matching data.
  278. if (Verify) {
  279. if (auto Err = DT.verify(OutFile))
  280. return Err;
  281. }
  282. return Error::success();
  283. }
  284. static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
  285. const std::string &OutFile) {
  286. Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
  287. error(Filename, errorToErrorCode(BinOrErr.takeError()));
  288. if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
  289. Triple ObjTriple(Obj->makeTriple());
  290. auto ArchName = ObjTriple.getArchName();
  291. outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
  292. if (auto Err = handleObjectFile(*Obj, OutFile))
  293. return Err;
  294. } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
  295. // Iterate over all contained architectures and filter out any that were
  296. // not specified with the "--arch <arch>" option. If the --arch option was
  297. // not specified on the command line, we will process all architectures.
  298. std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
  299. for (auto &ObjForArch : Fat->objects()) {
  300. if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
  301. auto &Obj = **MachOOrErr;
  302. if (filterArch(Obj))
  303. FilterObjs.emplace_back(MachOOrErr->release());
  304. } else {
  305. error(Filename, MachOOrErr.takeError());
  306. }
  307. }
  308. if (FilterObjs.empty())
  309. error(Filename, createStringError(std::errc::invalid_argument,
  310. "no matching architectures found"));
  311. // Now handle each architecture we need to convert.
  312. for (auto &Obj : FilterObjs) {
  313. Triple ObjTriple(Obj->getArchTriple());
  314. auto ArchName = ObjTriple.getArchName();
  315. std::string ArchOutFile(OutFile);
  316. // If we are only handling a single architecture, then we will use the
  317. // normal output file. If we are handling multiple architectures append
  318. // the architecture name to the end of the out file path so that we
  319. // don't overwrite the previous architecture's gsym file.
  320. if (FilterObjs.size() > 1) {
  321. ArchOutFile.append(1, '.');
  322. ArchOutFile.append(ArchName.str());
  323. }
  324. outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
  325. if (auto Err = handleObjectFile(*Obj, ArchOutFile))
  326. return Err;
  327. }
  328. }
  329. return Error::success();
  330. }
  331. static llvm::Error handleFileConversionToGSYM(StringRef Filename,
  332. const std::string &OutFile) {
  333. ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
  334. MemoryBuffer::getFileOrSTDIN(Filename);
  335. error(Filename, BuffOrErr.getError());
  336. std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
  337. return handleBuffer(Filename, *Buffer, OutFile);
  338. }
  339. static llvm::Error convertFileToGSYM(raw_ostream &OS) {
  340. // Expand any .dSYM bundles to the individual object files contained therein.
  341. std::vector<std::string> Objects;
  342. std::string OutFile = OutputFilename;
  343. if (OutFile.empty()) {
  344. OutFile = ConvertFilename;
  345. OutFile += ".gsym";
  346. }
  347. OS << "Input file: " << ConvertFilename << "\n";
  348. if (auto DsymObjectsOrErr =
  349. MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
  350. if (DsymObjectsOrErr->empty())
  351. Objects.push_back(ConvertFilename);
  352. else
  353. llvm::append_range(Objects, *DsymObjectsOrErr);
  354. } else {
  355. error(DsymObjectsOrErr.takeError());
  356. }
  357. for (auto Object : Objects) {
  358. if (auto Err = handleFileConversionToGSYM(Object, OutFile))
  359. return Err;
  360. }
  361. return Error::success();
  362. }
  363. static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
  364. if (auto Result = Gsym.lookup(Addr)) {
  365. // If verbose is enabled dump the full function info for the address.
  366. if (Verbose) {
  367. if (auto FI = Gsym.getFunctionInfo(Addr)) {
  368. OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
  369. Gsym.dump(OS, *FI);
  370. OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
  371. }
  372. }
  373. OS << Result.get();
  374. } else {
  375. if (Verbose)
  376. OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
  377. OS << HEX64(Addr) << ": ";
  378. logAllUnhandledErrors(Result.takeError(), OS, "error: ");
  379. }
  380. if (Verbose)
  381. OS << "\n";
  382. }
  383. int main(int argc, char const *argv[]) {
  384. // Print a stack trace if we signal out.
  385. sys::PrintStackTraceOnErrorSignal(argv[0]);
  386. PrettyStackTraceProgram X(argc, argv);
  387. llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
  388. llvm::InitializeAllTargets();
  389. const char *Overview =
  390. "A tool for dumping, searching and creating GSYM files.\n\n"
  391. "Specify one or more GSYM paths as arguments to dump all of the "
  392. "information in each GSYM file.\n"
  393. "Specify a single GSYM file along with one or more --lookup options to "
  394. "lookup addresses within that GSYM file.\n"
  395. "Use the --convert option to specify a file with option --out-file "
  396. "option to convert to GSYM format.\n";
  397. HideUnrelatedOptions({&GeneralOptions, &ConversionOptions, &LookupOptions});
  398. cl::ParseCommandLineOptions(argc, argv, Overview);
  399. if (Help) {
  400. PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
  401. return 0;
  402. }
  403. raw_ostream &OS = outs();
  404. if (!ConvertFilename.empty()) {
  405. // Convert DWARF to GSYM
  406. if (!InputFilenames.empty()) {
  407. OS << "error: no input files can be specified when using the --convert "
  408. "option.\n";
  409. return 1;
  410. }
  411. // Call error() if we have an error and it will exit with a status of 1
  412. if (auto Err = convertFileToGSYM(OS))
  413. error("DWARF conversion failed: ", std::move(Err));
  414. return 0;
  415. }
  416. if (LookupAddressesFromStdin) {
  417. if (!LookupAddresses.empty() || !InputFilenames.empty()) {
  418. OS << "error: no input files or addresses can be specified when using "
  419. "the --addresses-from-stdin "
  420. "option.\n";
  421. return 1;
  422. }
  423. std::string InputLine;
  424. std::string CurrentGSYMPath;
  425. llvm::Optional<Expected<GsymReader>> CurrentGsym;
  426. while (std::getline(std::cin, InputLine)) {
  427. // Strip newline characters.
  428. std::string StrippedInputLine(InputLine);
  429. llvm::erase_if(StrippedInputLine,
  430. [](char c) { return c == '\r' || c == '\n'; });
  431. StringRef AddrStr, GSYMPath;
  432. std::tie(AddrStr, GSYMPath) =
  433. llvm::StringRef{StrippedInputLine}.split(' ');
  434. if (GSYMPath != CurrentGSYMPath) {
  435. CurrentGsym = GsymReader::openFile(GSYMPath);
  436. if (!*CurrentGsym)
  437. error(GSYMPath, CurrentGsym->takeError());
  438. }
  439. uint64_t Addr;
  440. if (AddrStr.getAsInteger(0, Addr)) {
  441. OS << "error: invalid address " << AddrStr
  442. << ", expected: Address GsymFile.\n";
  443. return 1;
  444. }
  445. doLookup(**CurrentGsym, Addr, OS);
  446. OS << "\n";
  447. OS.flush();
  448. }
  449. return EXIT_SUCCESS;
  450. }
  451. // Dump or access data inside GSYM files
  452. for (const auto &GSYMPath : InputFilenames) {
  453. auto Gsym = GsymReader::openFile(GSYMPath);
  454. if (!Gsym)
  455. error(GSYMPath, Gsym.takeError());
  456. if (LookupAddresses.empty()) {
  457. Gsym->dump(outs());
  458. continue;
  459. }
  460. // Lookup an address in a GSYM file and print any matches.
  461. OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
  462. for (auto Addr : LookupAddresses) {
  463. doLookup(*Gsym, Addr, OS);
  464. }
  465. }
  466. return EXIT_SUCCESS;
  467. }