llvm-gsymutil.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "llvm/ADT/STLExtras.h"
  10. #include "llvm/ADT/StringSet.h"
  11. #include "llvm/ADT/Triple.h"
  12. #include "llvm/DebugInfo/DIContext.h"
  13. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  14. #include "llvm/Object/Archive.h"
  15. #include "llvm/Object/ELFObjectFile.h"
  16. #include "llvm/Object/MachOUniversal.h"
  17. #include "llvm/Object/ObjectFile.h"
  18. #include "llvm/Support/CommandLine.h"
  19. #include "llvm/Support/Debug.h"
  20. #include "llvm/Support/Format.h"
  21. #include "llvm/Support/ManagedStatic.h"
  22. #include "llvm/Support/MemoryBuffer.h"
  23. #include "llvm/Support/Path.h"
  24. #include "llvm/Support/PrettyStackTrace.h"
  25. #include "llvm/Support/Regex.h"
  26. #include "llvm/Support/Signals.h"
  27. #include "llvm/Support/TargetSelect.h"
  28. #include "llvm/Support/raw_ostream.h"
  29. #include <algorithm>
  30. #include <cstring>
  31. #include <inttypes.h>
  32. #include <map>
  33. #include <string>
  34. #include <system_error>
  35. #include <vector>
  36. #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
  37. #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
  38. #include "llvm/DebugInfo/GSYM/GsymCreator.h"
  39. #include "llvm/DebugInfo/GSYM/GsymReader.h"
  40. #include "llvm/DebugInfo/GSYM/InlineInfo.h"
  41. #include "llvm/DebugInfo/GSYM/LookupResult.h"
  42. #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
  43. using namespace llvm;
  44. using namespace gsym;
  45. using namespace object;
  46. /// @}
  47. /// Command line options.
  48. /// @{
  49. namespace {
  50. using namespace cl;
  51. OptionCategory GeneralOptions("Options");
  52. OptionCategory ConversionOptions("Conversion Options");
  53. OptionCategory LookupOptions("Lookup Options");
  54. static opt<bool> Help("h", desc("Alias for -help"), Hidden,
  55. cat(GeneralOptions));
  56. static opt<bool> Verbose("verbose",
  57. desc("Enable verbose logging and encoding details."),
  58. cat(GeneralOptions));
  59. static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
  60. ZeroOrMore, cat(GeneralOptions));
  61. static opt<std::string>
  62. ConvertFilename("convert", cl::init(""),
  63. cl::desc("Convert the specified file to the GSYM format.\n"
  64. "Supported files include ELF and mach-o files "
  65. "that will have their debug info (DWARF) and "
  66. "symbol table converted."),
  67. cl::value_desc("path"), cat(ConversionOptions));
  68. static list<std::string>
  69. ArchFilters("arch",
  70. desc("Process debug information for the specified CPU "
  71. "architecture only.\nArchitectures may be specified by "
  72. "name or by number.\nThis option can be specified "
  73. "multiple times, once for each desired architecture."),
  74. cl::value_desc("arch"), cat(ConversionOptions));
  75. static opt<std::string>
  76. OutputFilename("out-file", cl::init(""),
  77. cl::desc("Specify the path where the converted GSYM file "
  78. "will be saved.\nWhen not specified, a '.gsym' "
  79. "extension will be appended to the file name "
  80. "specified in the --convert option."),
  81. cl::value_desc("path"), cat(ConversionOptions));
  82. static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
  83. aliasopt(OutputFilename),
  84. cat(ConversionOptions));
  85. static opt<bool> Verify("verify",
  86. desc("Verify the generated GSYM file against the "
  87. "information in the file that was converted."),
  88. cat(ConversionOptions));
  89. static opt<unsigned>
  90. NumThreads("num-threads",
  91. desc("Specify the maximum number (n) of simultaneous threads "
  92. "to use when converting files to GSYM.\nDefaults to the "
  93. "number of cores on the current machine."),
  94. cl::value_desc("n"), cat(ConversionOptions));
  95. static list<uint64_t> LookupAddresses("address",
  96. desc("Lookup an address in a GSYM file"),
  97. cl::value_desc("addr"),
  98. cat(LookupOptions));
  99. } // namespace
  100. /// @}
  101. //===----------------------------------------------------------------------===//
  102. static void error(StringRef Prefix, llvm::Error Err) {
  103. if (!Err)
  104. return;
  105. errs() << Prefix << ": " << Err << "\n";
  106. consumeError(std::move(Err));
  107. exit(1);
  108. }
  109. static void error(StringRef Prefix, std::error_code EC) {
  110. if (!EC)
  111. return;
  112. errs() << Prefix << ": " << EC.message() << "\n";
  113. exit(1);
  114. }
  115. /// If the input path is a .dSYM bundle (as created by the dsymutil tool),
  116. /// replace it with individual entries for each of the object files inside the
  117. /// bundle otherwise return the input path.
  118. static std::vector<std::string> expandBundle(const std::string &InputPath) {
  119. std::vector<std::string> BundlePaths;
  120. SmallString<256> BundlePath(InputPath);
  121. // Manually open up the bundle to avoid introducing additional dependencies.
  122. if (sys::fs::is_directory(BundlePath) &&
  123. sys::path::extension(BundlePath) == ".dSYM") {
  124. std::error_code EC;
  125. sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
  126. for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
  127. Dir != DirEnd && !EC; Dir.increment(EC)) {
  128. const std::string &Path = Dir->path();
  129. sys::fs::file_status Status;
  130. EC = sys::fs::status(Path, Status);
  131. error(Path, EC);
  132. switch (Status.type()) {
  133. case sys::fs::file_type::regular_file:
  134. case sys::fs::file_type::symlink_file:
  135. case sys::fs::file_type::type_unknown:
  136. BundlePaths.push_back(Path);
  137. break;
  138. default: /*ignore*/;
  139. }
  140. }
  141. error(BundlePath, EC);
  142. }
  143. if (!BundlePaths.size())
  144. BundlePaths.push_back(InputPath);
  145. return BundlePaths;
  146. }
  147. static uint32_t getCPUType(MachOObjectFile &MachO) {
  148. if (MachO.is64Bit())
  149. return MachO.getHeader64().cputype;
  150. else
  151. return MachO.getHeader().cputype;
  152. }
  153. /// Return true if the object file has not been filtered by an --arch option.
  154. static bool filterArch(MachOObjectFile &Obj) {
  155. if (ArchFilters.empty())
  156. return true;
  157. Triple ObjTriple(Obj.getArchTriple());
  158. StringRef ObjArch = ObjTriple.getArchName();
  159. for (auto Arch : ArchFilters) {
  160. // Match name.
  161. if (Arch == ObjArch)
  162. return true;
  163. // Match architecture number.
  164. unsigned Value;
  165. if (!StringRef(Arch).getAsInteger(0, Value))
  166. if (Value == getCPUType(Obj))
  167. return true;
  168. }
  169. return false;
  170. }
  171. /// Determine the virtual address that is considered the base address of an ELF
  172. /// object file.
  173. ///
  174. /// The base address of an ELF file is the the "p_vaddr" of the first program
  175. /// header whose "p_type" is PT_LOAD.
  176. ///
  177. /// \param ELFFile An ELF object file we will search.
  178. ///
  179. /// \returns A valid image base address if we are able to extract one.
  180. template <class ELFT>
  181. static llvm::Optional<uint64_t>
  182. getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
  183. auto PhdrRangeOrErr = ELFFile.program_headers();
  184. if (!PhdrRangeOrErr) {
  185. consumeError(PhdrRangeOrErr.takeError());
  186. return llvm::None;
  187. }
  188. for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
  189. if (Phdr.p_type == ELF::PT_LOAD)
  190. return (uint64_t)Phdr.p_vaddr;
  191. return llvm::None;
  192. }
  193. /// Determine the virtual address that is considered the base address of mach-o
  194. /// object file.
  195. ///
  196. /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
  197. ///
  198. /// \param MachO A mach-o object file we will search.
  199. ///
  200. /// \returns A valid image base address if we are able to extract one.
  201. static llvm::Optional<uint64_t>
  202. getImageBaseAddress(const object::MachOObjectFile *MachO) {
  203. for (const auto &Command : MachO->load_commands()) {
  204. if (Command.C.cmd == MachO::LC_SEGMENT) {
  205. MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
  206. StringRef SegName = SLC.segname;
  207. if (SegName == "__TEXT")
  208. return SLC.vmaddr;
  209. } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
  210. MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
  211. StringRef SegName = SLC.segname;
  212. if (SegName == "__TEXT")
  213. return SLC.vmaddr;
  214. }
  215. }
  216. return llvm::None;
  217. }
  218. /// Determine the virtual address that is considered the base address of an
  219. /// object file.
  220. ///
  221. /// Since GSYM files are used for symbolication, many clients will need to
  222. /// easily adjust addresses they find in stack traces so the lookups happen
  223. /// on unslid addresses from the original object file. If the base address of
  224. /// a GSYM file is set to the base address of the image, then this address
  225. /// adjusting is much easier.
  226. ///
  227. /// \param Obj An object file we will search.
  228. ///
  229. /// \returns A valid image base address if we are able to extract one.
  230. static llvm::Optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
  231. if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
  232. return getImageBaseAddress(MachO);
  233. else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
  234. return getImageBaseAddress(ELFObj->getELFFile());
  235. else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
  236. return getImageBaseAddress(ELFObj->getELFFile());
  237. else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
  238. return getImageBaseAddress(ELFObj->getELFFile());
  239. else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
  240. return getImageBaseAddress(ELFObj->getELFFile());
  241. return llvm::None;
  242. }
  243. static llvm::Error handleObjectFile(ObjectFile &Obj,
  244. const std::string &OutFile) {
  245. auto ThreadCount =
  246. NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
  247. auto &OS = outs();
  248. GsymCreator Gsym;
  249. // See if we can figure out the base address for a given object file, and if
  250. // we can, then set the base address to use to this value. This will ease
  251. // symbolication since clients can slide the GSYM lookup addresses by using
  252. // the load bias of the shared library.
  253. if (auto ImageBaseAddr = getImageBaseAddress(Obj))
  254. Gsym.setBaseAddress(*ImageBaseAddr);
  255. // We need to know where the valid sections are that contain instructions.
  256. // See header documentation for DWARFTransformer::SetValidTextRanges() for
  257. // defails.
  258. AddressRanges TextRanges;
  259. for (const object::SectionRef &Sect : Obj.sections()) {
  260. if (!Sect.isText())
  261. continue;
  262. const uint64_t Size = Sect.getSize();
  263. if (Size == 0)
  264. continue;
  265. const uint64_t StartAddr = Sect.getAddress();
  266. TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
  267. }
  268. // Make sure there is DWARF to convert first.
  269. std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
  270. if (!DICtx)
  271. return createStringError(std::errc::invalid_argument,
  272. "unable to create DWARF context");
  273. logAllUnhandledErrors(DICtx->loadRegisterInfo(Obj), OS,
  274. "DwarfTransformer: ");
  275. // Make a DWARF transformer object and populate the ranges of the code
  276. // so we don't end up adding invalid functions to GSYM data.
  277. DwarfTransformer DT(*DICtx, OS, Gsym);
  278. if (!TextRanges.empty())
  279. Gsym.SetValidTextRanges(TextRanges);
  280. // Convert all DWARF to GSYM.
  281. if (auto Err = DT.convert(ThreadCount))
  282. return Err;
  283. // Get the UUID and convert symbol table to GSYM.
  284. if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
  285. return Err;
  286. // Finalize the GSYM to make it ready to save to disk. This will remove
  287. // duplicate FunctionInfo entries where we might have found an entry from
  288. // debug info and also a symbol table entry from the object file.
  289. if (auto Err = Gsym.finalize(OS))
  290. return Err;
  291. // Save the GSYM file to disk.
  292. support::endianness Endian = Obj.makeTriple().isLittleEndian() ?
  293. support::little : support::big;
  294. if (auto Err = Gsym.save(OutFile.c_str(), Endian))
  295. return Err;
  296. // Verify the DWARF if requested. This will ensure all the info in the DWARF
  297. // can be looked up in the GSYM and that all lookups get matching data.
  298. if (Verify) {
  299. if (auto Err = DT.verify(OutFile))
  300. return Err;
  301. }
  302. return Error::success();
  303. }
  304. static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
  305. const std::string &OutFile) {
  306. Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
  307. error(Filename, errorToErrorCode(BinOrErr.takeError()));
  308. if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
  309. Triple ObjTriple(Obj->makeTriple());
  310. auto ArchName = ObjTriple.getArchName();
  311. outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
  312. if (auto Err = handleObjectFile(*Obj, OutFile.c_str()))
  313. return Err;
  314. } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
  315. // Iterate over all contained architectures and filter out any that were
  316. // not specified with the "--arch <arch>" option. If the --arch option was
  317. // not specified on the command line, we will process all architectures.
  318. std::vector< std::unique_ptr<MachOObjectFile> > FilterObjs;
  319. for (auto &ObjForArch : Fat->objects()) {
  320. if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
  321. auto &Obj = **MachOOrErr;
  322. if (filterArch(Obj))
  323. FilterObjs.emplace_back(MachOOrErr->release());
  324. } else {
  325. error(Filename, MachOOrErr.takeError());
  326. }
  327. }
  328. if (FilterObjs.empty())
  329. error(Filename, createStringError(std::errc::invalid_argument,
  330. "no matching architectures found"));
  331. // Now handle each architecture we need to convert.
  332. for (auto &Obj: FilterObjs) {
  333. Triple ObjTriple(Obj->getArchTriple());
  334. auto ArchName = ObjTriple.getArchName();
  335. std::string ArchOutFile(OutFile);
  336. // If we are only handling a single architecture, then we will use the
  337. // normal output file. If we are handling multiple architectures append
  338. // the architecture name to the end of the out file path so that we
  339. // don't overwrite the previous architecture's gsym file.
  340. if (FilterObjs.size() > 1) {
  341. ArchOutFile.append(1, '.');
  342. ArchOutFile.append(ArchName.str());
  343. }
  344. outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
  345. if (auto Err = handleObjectFile(*Obj, ArchOutFile))
  346. return Err;
  347. }
  348. }
  349. return Error::success();
  350. }
  351. static llvm::Error handleFileConversionToGSYM(StringRef Filename,
  352. const std::string &OutFile) {
  353. ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
  354. MemoryBuffer::getFileOrSTDIN(Filename);
  355. error(Filename, BuffOrErr.getError());
  356. std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
  357. return handleBuffer(Filename, *Buffer, OutFile);
  358. }
  359. static llvm::Error convertFileToGSYM(raw_ostream &OS) {
  360. // Expand any .dSYM bundles to the individual object files contained therein.
  361. std::vector<std::string> Objects;
  362. std::string OutFile = OutputFilename;
  363. if (OutFile.empty()) {
  364. OutFile = ConvertFilename;
  365. OutFile += ".gsym";
  366. }
  367. OS << "Input file: " << ConvertFilename << "\n";
  368. auto Objs = expandBundle(ConvertFilename);
  369. llvm::append_range(Objects, Objs);
  370. for (auto Object : Objects) {
  371. if (auto Err = handleFileConversionToGSYM(Object, OutFile))
  372. return Err;
  373. }
  374. return Error::success();
  375. }
  376. int main(int argc, char const *argv[]) {
  377. // Print a stack trace if we signal out.
  378. sys::PrintStackTraceOnErrorSignal(argv[0]);
  379. PrettyStackTraceProgram X(argc, argv);
  380. llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
  381. llvm::InitializeAllTargets();
  382. const char *Overview =
  383. "A tool for dumping, searching and creating GSYM files.\n\n"
  384. "Specify one or more GSYM paths as arguments to dump all of the "
  385. "information in each GSYM file.\n"
  386. "Specify a single GSYM file along with one or more --lookup options to "
  387. "lookup addresses within that GSYM file.\n"
  388. "Use the --convert option to specify a file with option --out-file "
  389. "option to convert to GSYM format.\n";
  390. HideUnrelatedOptions(
  391. {&GeneralOptions, &ConversionOptions, &LookupOptions});
  392. cl::ParseCommandLineOptions(argc, argv, Overview);
  393. if (Help) {
  394. PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
  395. return 0;
  396. }
  397. raw_ostream &OS = outs();
  398. if (!ConvertFilename.empty()) {
  399. // Convert DWARF to GSYM
  400. if (!InputFilenames.empty()) {
  401. OS << "error: no input files can be specified when using the --convert "
  402. "option.\n";
  403. return 1;
  404. }
  405. // Call error() if we have an error and it will exit with a status of 1
  406. if (auto Err = convertFileToGSYM(OS))
  407. error("DWARF conversion failed: ", std::move(Err));
  408. return 0;
  409. }
  410. // Dump or access data inside GSYM files
  411. for (const auto &GSYMPath : InputFilenames) {
  412. auto Gsym = GsymReader::openFile(GSYMPath);
  413. if (!Gsym)
  414. error(GSYMPath, Gsym.takeError());
  415. if (LookupAddresses.empty()) {
  416. Gsym->dump(outs());
  417. continue;
  418. }
  419. // Lookup an address in a GSYM file and print any matches.
  420. OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
  421. for (auto Addr: LookupAddresses) {
  422. if (auto Result = Gsym->lookup(Addr)) {
  423. // If verbose is enabled dump the full function info for the address.
  424. if (Verbose) {
  425. if (auto FI = Gsym->getFunctionInfo(Addr)) {
  426. OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
  427. Gsym->dump(OS, *FI);
  428. OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
  429. }
  430. }
  431. OS << Result.get();
  432. } else {
  433. if (Verbose)
  434. OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
  435. OS << HEX64(Addr) << ": ";
  436. logAllUnhandledErrors(Result.takeError(), OS, "error: ");
  437. }
  438. if (Verbose)
  439. OS << "\n";
  440. }
  441. }
  442. return EXIT_SUCCESS;
  443. }