llvm-gsymutil.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/ADT/STLExtras.h"
  9. #include "llvm/ADT/StringSet.h"
  10. #include "llvm/ADT/Triple.h"
  11. #include "llvm/DebugInfo/DIContext.h"
  12. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  13. #include "llvm/Object/Archive.h"
  14. #include "llvm/Object/ELFObjectFile.h"
  15. #include "llvm/Object/MachOUniversal.h"
  16. #include "llvm/Object/ObjectFile.h"
  17. #include "llvm/Support/CommandLine.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/Format.h"
  20. #include "llvm/Support/ManagedStatic.h"
  21. #include "llvm/Support/MemoryBuffer.h"
  22. #include "llvm/Support/PrettyStackTrace.h"
  23. #include "llvm/Support/Regex.h"
  24. #include "llvm/Support/Signals.h"
  25. #include "llvm/Support/TargetSelect.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include <algorithm>
  28. #include <cstring>
  29. #include <inttypes.h>
  30. #include <iostream>
  31. #include <map>
  32. #include <string>
  33. #include <system_error>
  34. #include <vector>
  35. #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
  36. #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
  37. #include "llvm/DebugInfo/GSYM/GsymCreator.h"
  38. #include "llvm/DebugInfo/GSYM/GsymReader.h"
  39. #include "llvm/DebugInfo/GSYM/InlineInfo.h"
  40. #include "llvm/DebugInfo/GSYM/LookupResult.h"
  41. #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
  42. #include <optional>
  43. using namespace llvm;
  44. using namespace gsym;
  45. using namespace object;
  46. /// @}
  47. /// Command line options.
  48. /// @{
  49. namespace {
  50. using namespace cl;
  51. OptionCategory GeneralOptions("Options");
  52. OptionCategory ConversionOptions("Conversion Options");
  53. OptionCategory LookupOptions("Lookup Options");
  54. static opt<bool> Help("h", desc("Alias for -help"), Hidden,
  55. cat(GeneralOptions));
  56. static opt<bool> Verbose("verbose",
  57. desc("Enable verbose logging and encoding details."),
  58. cat(GeneralOptions));
  59. static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
  60. cat(GeneralOptions));
  61. static opt<std::string>
  62. ConvertFilename("convert", cl::init(""),
  63. cl::desc("Convert the specified file to the GSYM format.\n"
  64. "Supported files include ELF and mach-o files "
  65. "that will have their debug info (DWARF) and "
  66. "symbol table converted."),
  67. cl::value_desc("path"), cat(ConversionOptions));
  68. static list<std::string>
  69. ArchFilters("arch",
  70. desc("Process debug information for the specified CPU "
  71. "architecture only.\nArchitectures may be specified by "
  72. "name or by number.\nThis option can be specified "
  73. "multiple times, once for each desired architecture."),
  74. cl::value_desc("arch"), cat(ConversionOptions));
  75. static opt<std::string>
  76. OutputFilename("out-file", cl::init(""),
  77. cl::desc("Specify the path where the converted GSYM file "
  78. "will be saved.\nWhen not specified, a '.gsym' "
  79. "extension will be appended to the file name "
  80. "specified in the --convert option."),
  81. cl::value_desc("path"), cat(ConversionOptions));
  82. static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
  83. aliasopt(OutputFilename),
  84. cat(ConversionOptions));
  85. static opt<bool> Verify("verify",
  86. desc("Verify the generated GSYM file against the "
  87. "information in the file that was converted."),
  88. cat(ConversionOptions));
  89. static opt<unsigned>
  90. NumThreads("num-threads",
  91. desc("Specify the maximum number (n) of simultaneous threads "
  92. "to use when converting files to GSYM.\nDefaults to the "
  93. "number of cores on the current machine."),
  94. cl::value_desc("n"), cat(ConversionOptions));
  95. static opt<bool>
  96. Quiet("quiet", desc("Do not output warnings about the debug information"),
  97. cat(ConversionOptions));
  98. static list<uint64_t> LookupAddresses("address",
  99. desc("Lookup an address in a GSYM file"),
  100. cl::value_desc("addr"),
  101. cat(LookupOptions));
  102. static opt<bool> LookupAddressesFromStdin(
  103. "addresses-from-stdin",
  104. desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
  105. "line is expected to be of the following format: <addr> <gsym-path>"),
  106. cat(LookupOptions));
  107. } // namespace
  108. /// @}
  109. //===----------------------------------------------------------------------===//
  110. static void error(Error Err) {
  111. if (!Err)
  112. return;
  113. WithColor::error() << toString(std::move(Err)) << "\n";
  114. exit(1);
  115. }
  116. static void error(StringRef Prefix, llvm::Error Err) {
  117. if (!Err)
  118. return;
  119. errs() << Prefix << ": " << Err << "\n";
  120. consumeError(std::move(Err));
  121. exit(1);
  122. }
  123. static void error(StringRef Prefix, std::error_code EC) {
  124. if (!EC)
  125. return;
  126. errs() << Prefix << ": " << EC.message() << "\n";
  127. exit(1);
  128. }
  129. static uint32_t getCPUType(MachOObjectFile &MachO) {
  130. if (MachO.is64Bit())
  131. return MachO.getHeader64().cputype;
  132. else
  133. return MachO.getHeader().cputype;
  134. }
  135. /// Return true if the object file has not been filtered by an --arch option.
  136. static bool filterArch(MachOObjectFile &Obj) {
  137. if (ArchFilters.empty())
  138. return true;
  139. Triple ObjTriple(Obj.getArchTriple());
  140. StringRef ObjArch = ObjTriple.getArchName();
  141. for (auto Arch : ArchFilters) {
  142. // Match name.
  143. if (Arch == ObjArch)
  144. return true;
  145. // Match architecture number.
  146. unsigned Value;
  147. if (!StringRef(Arch).getAsInteger(0, Value))
  148. if (Value == getCPUType(Obj))
  149. return true;
  150. }
  151. return false;
  152. }
  153. /// Determine the virtual address that is considered the base address of an ELF
  154. /// object file.
  155. ///
  156. /// The base address of an ELF file is the the "p_vaddr" of the first program
  157. /// header whose "p_type" is PT_LOAD.
  158. ///
  159. /// \param ELFFile An ELF object file we will search.
  160. ///
  161. /// \returns A valid image base address if we are able to extract one.
  162. template <class ELFT>
  163. static std::optional<uint64_t>
  164. getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
  165. auto PhdrRangeOrErr = ELFFile.program_headers();
  166. if (!PhdrRangeOrErr) {
  167. consumeError(PhdrRangeOrErr.takeError());
  168. return std::nullopt;
  169. }
  170. for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
  171. if (Phdr.p_type == ELF::PT_LOAD)
  172. return (uint64_t)Phdr.p_vaddr;
  173. return std::nullopt;
  174. }
  175. /// Determine the virtual address that is considered the base address of mach-o
  176. /// object file.
  177. ///
  178. /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
  179. ///
  180. /// \param MachO A mach-o object file we will search.
  181. ///
  182. /// \returns A valid image base address if we are able to extract one.
  183. static std::optional<uint64_t>
  184. getImageBaseAddress(const object::MachOObjectFile *MachO) {
  185. for (const auto &Command : MachO->load_commands()) {
  186. if (Command.C.cmd == MachO::LC_SEGMENT) {
  187. MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
  188. StringRef SegName = SLC.segname;
  189. if (SegName == "__TEXT")
  190. return SLC.vmaddr;
  191. } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
  192. MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
  193. StringRef SegName = SLC.segname;
  194. if (SegName == "__TEXT")
  195. return SLC.vmaddr;
  196. }
  197. }
  198. return std::nullopt;
  199. }
  200. /// Determine the virtual address that is considered the base address of an
  201. /// object file.
  202. ///
  203. /// Since GSYM files are used for symbolication, many clients will need to
  204. /// easily adjust addresses they find in stack traces so the lookups happen
  205. /// on unslid addresses from the original object file. If the base address of
  206. /// a GSYM file is set to the base address of the image, then this address
  207. /// adjusting is much easier.
  208. ///
  209. /// \param Obj An object file we will search.
  210. ///
  211. /// \returns A valid image base address if we are able to extract one.
  212. static std::optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
  213. if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
  214. return getImageBaseAddress(MachO);
  215. else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
  216. return getImageBaseAddress(ELFObj->getELFFile());
  217. else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
  218. return getImageBaseAddress(ELFObj->getELFFile());
  219. else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
  220. return getImageBaseAddress(ELFObj->getELFFile());
  221. else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
  222. return getImageBaseAddress(ELFObj->getELFFile());
  223. return std::nullopt;
  224. }
  225. static llvm::Error handleObjectFile(ObjectFile &Obj,
  226. const std::string &OutFile) {
  227. auto ThreadCount =
  228. NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
  229. auto &OS = outs();
  230. GsymCreator Gsym(Quiet);
  231. // See if we can figure out the base address for a given object file, and if
  232. // we can, then set the base address to use to this value. This will ease
  233. // symbolication since clients can slide the GSYM lookup addresses by using
  234. // the load bias of the shared library.
  235. if (auto ImageBaseAddr = getImageBaseAddress(Obj))
  236. Gsym.setBaseAddress(*ImageBaseAddr);
  237. // We need to know where the valid sections are that contain instructions.
  238. // See header documentation for DWARFTransformer::SetValidTextRanges() for
  239. // defails.
  240. AddressRanges TextRanges;
  241. for (const object::SectionRef &Sect : Obj.sections()) {
  242. if (!Sect.isText())
  243. continue;
  244. const uint64_t Size = Sect.getSize();
  245. if (Size == 0)
  246. continue;
  247. const uint64_t StartAddr = Sect.getAddress();
  248. TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
  249. }
  250. // Make sure there is DWARF to convert first.
  251. std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
  252. if (!DICtx)
  253. return createStringError(std::errc::invalid_argument,
  254. "unable to create DWARF context");
  255. // Make a DWARF transformer object and populate the ranges of the code
  256. // so we don't end up adding invalid functions to GSYM data.
  257. DwarfTransformer DT(*DICtx, OS, Gsym);
  258. if (!TextRanges.empty())
  259. Gsym.SetValidTextRanges(TextRanges);
  260. // Convert all DWARF to GSYM.
  261. if (auto Err = DT.convert(ThreadCount))
  262. return Err;
  263. // Get the UUID and convert symbol table to GSYM.
  264. if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
  265. return Err;
  266. // Finalize the GSYM to make it ready to save to disk. This will remove
  267. // duplicate FunctionInfo entries where we might have found an entry from
  268. // debug info and also a symbol table entry from the object file.
  269. if (auto Err = Gsym.finalize(OS))
  270. return Err;
  271. // Save the GSYM file to disk.
  272. support::endianness Endian =
  273. Obj.makeTriple().isLittleEndian() ? support::little : support::big;
  274. if (auto Err = Gsym.save(OutFile, Endian))
  275. return Err;
  276. // Verify the DWARF if requested. This will ensure all the info in the DWARF
  277. // can be looked up in the GSYM and that all lookups get matching data.
  278. if (Verify) {
  279. if (auto Err = DT.verify(OutFile))
  280. return Err;
  281. }
  282. return Error::success();
  283. }
  284. static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
  285. const std::string &OutFile) {
  286. Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
  287. error(Filename, errorToErrorCode(BinOrErr.takeError()));
  288. if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
  289. Triple ObjTriple(Obj->makeTriple());
  290. auto ArchName = ObjTriple.getArchName();
  291. outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
  292. if (auto Err = handleObjectFile(*Obj, OutFile))
  293. return Err;
  294. } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
  295. // Iterate over all contained architectures and filter out any that were
  296. // not specified with the "--arch <arch>" option. If the --arch option was
  297. // not specified on the command line, we will process all architectures.
  298. std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
  299. for (auto &ObjForArch : Fat->objects()) {
  300. if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
  301. auto &Obj = **MachOOrErr;
  302. if (filterArch(Obj))
  303. FilterObjs.emplace_back(MachOOrErr->release());
  304. } else {
  305. error(Filename, MachOOrErr.takeError());
  306. }
  307. }
  308. if (FilterObjs.empty())
  309. error(Filename, createStringError(std::errc::invalid_argument,
  310. "no matching architectures found"));
  311. // Now handle each architecture we need to convert.
  312. for (auto &Obj : FilterObjs) {
  313. Triple ObjTriple(Obj->getArchTriple());
  314. auto ArchName = ObjTriple.getArchName();
  315. std::string ArchOutFile(OutFile);
  316. // If we are only handling a single architecture, then we will use the
  317. // normal output file. If we are handling multiple architectures append
  318. // the architecture name to the end of the out file path so that we
  319. // don't overwrite the previous architecture's gsym file.
  320. if (FilterObjs.size() > 1) {
  321. ArchOutFile.append(1, '.');
  322. ArchOutFile.append(ArchName.str());
  323. }
  324. outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
  325. if (auto Err = handleObjectFile(*Obj, ArchOutFile))
  326. return Err;
  327. }
  328. }
  329. return Error::success();
  330. }
  331. static llvm::Error handleFileConversionToGSYM(StringRef Filename,
  332. const std::string &OutFile) {
  333. ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
  334. MemoryBuffer::getFileOrSTDIN(Filename);
  335. error(Filename, BuffOrErr.getError());
  336. std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
  337. return handleBuffer(Filename, *Buffer, OutFile);
  338. }
  339. static llvm::Error convertFileToGSYM(raw_ostream &OS) {
  340. // Expand any .dSYM bundles to the individual object files contained therein.
  341. std::vector<std::string> Objects;
  342. std::string OutFile = OutputFilename;
  343. if (OutFile.empty()) {
  344. OutFile = ConvertFilename;
  345. OutFile += ".gsym";
  346. }
  347. OS << "Input file: " << ConvertFilename << "\n";
  348. if (auto DsymObjectsOrErr =
  349. MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
  350. if (DsymObjectsOrErr->empty())
  351. Objects.push_back(ConvertFilename);
  352. else
  353. llvm::append_range(Objects, *DsymObjectsOrErr);
  354. } else {
  355. error(DsymObjectsOrErr.takeError());
  356. }
  357. for (auto Object : Objects) {
  358. if (auto Err = handleFileConversionToGSYM(Object, OutFile))
  359. return Err;
  360. }
  361. return Error::success();
  362. }
  363. static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
  364. if (auto Result = Gsym.lookup(Addr)) {
  365. // If verbose is enabled dump the full function info for the address.
  366. if (Verbose) {
  367. if (auto FI = Gsym.getFunctionInfo(Addr)) {
  368. OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
  369. Gsym.dump(OS, *FI);
  370. OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
  371. }
  372. }
  373. OS << Result.get();
  374. } else {
  375. if (Verbose)
  376. OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
  377. OS << HEX64(Addr) << ": ";
  378. logAllUnhandledErrors(Result.takeError(), OS, "error: ");
  379. }
  380. if (Verbose)
  381. OS << "\n";
  382. }
  383. int main(int argc, char const *argv[]) {
  384. // Print a stack trace if we signal out.
  385. sys::PrintStackTraceOnErrorSignal(argv[0]);
  386. PrettyStackTraceProgram X(argc, argv);
  387. llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
  388. llvm::InitializeAllTargets();
  389. const char *Overview =
  390. "A tool for dumping, searching and creating GSYM files.\n\n"
  391. "Specify one or more GSYM paths as arguments to dump all of the "
  392. "information in each GSYM file.\n"
  393. "Specify a single GSYM file along with one or more --lookup options to "
  394. "lookup addresses within that GSYM file.\n"
  395. "Use the --convert option to specify a file with option --out-file "
  396. "option to convert to GSYM format.\n";
  397. HideUnrelatedOptions({&GeneralOptions, &ConversionOptions, &LookupOptions});
  398. cl::ParseCommandLineOptions(argc, argv, Overview);
  399. if (Help) {
  400. PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
  401. return 0;
  402. }
  403. raw_ostream &OS = outs();
  404. if (!ConvertFilename.empty()) {
  405. // Convert DWARF to GSYM
  406. if (!InputFilenames.empty()) {
  407. OS << "error: no input files can be specified when using the --convert "
  408. "option.\n";
  409. return 1;
  410. }
  411. // Call error() if we have an error and it will exit with a status of 1
  412. if (auto Err = convertFileToGSYM(OS))
  413. error("DWARF conversion failed: ", std::move(Err));
  414. return 0;
  415. }
  416. if (LookupAddressesFromStdin) {
  417. if (!LookupAddresses.empty() || !InputFilenames.empty()) {
  418. OS << "error: no input files or addresses can be specified when using "
  419. "the --addresses-from-stdin "
  420. "option.\n";
  421. return 1;
  422. }
  423. std::string InputLine;
  424. std::string CurrentGSYMPath;
  425. std::optional<Expected<GsymReader>> CurrentGsym;
  426. while (std::getline(std::cin, InputLine)) {
  427. // Strip newline characters.
  428. std::string StrippedInputLine(InputLine);
  429. llvm::erase_if(StrippedInputLine,
  430. [](char c) { return c == '\r' || c == '\n'; });
  431. StringRef AddrStr, GSYMPath;
  432. std::tie(AddrStr, GSYMPath) =
  433. llvm::StringRef{StrippedInputLine}.split(' ');
  434. if (GSYMPath != CurrentGSYMPath) {
  435. CurrentGsym = GsymReader::openFile(GSYMPath);
  436. if (!*CurrentGsym)
  437. error(GSYMPath, CurrentGsym->takeError());
  438. CurrentGSYMPath = GSYMPath;
  439. }
  440. uint64_t Addr;
  441. if (AddrStr.getAsInteger(0, Addr)) {
  442. OS << "error: invalid address " << AddrStr
  443. << ", expected: Address GsymFile.\n";
  444. return 1;
  445. }
  446. doLookup(**CurrentGsym, Addr, OS);
  447. OS << "\n";
  448. OS.flush();
  449. }
  450. return EXIT_SUCCESS;
  451. }
  452. // Dump or access data inside GSYM files
  453. for (const auto &GSYMPath : InputFilenames) {
  454. auto Gsym = GsymReader::openFile(GSYMPath);
  455. if (!Gsym)
  456. error(GSYMPath, Gsym.takeError());
  457. if (LookupAddresses.empty()) {
  458. Gsym->dump(outs());
  459. continue;
  460. }
  461. // Lookup an address in a GSYM file and print any matches.
  462. OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
  463. for (auto Addr : LookupAddresses) {
  464. doLookup(*Gsym, Addr, OS);
  465. }
  466. }
  467. return EXIT_SUCCESS;
  468. }