123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536 |
- //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/ADT/StringSet.h"
- #include "llvm/ADT/Triple.h"
- #include "llvm/DebugInfo/DIContext.h"
- #include "llvm/DebugInfo/DWARF/DWARFContext.h"
- #include "llvm/Object/Archive.h"
- #include "llvm/Object/ELFObjectFile.h"
- #include "llvm/Object/MachOUniversal.h"
- #include "llvm/Object/ObjectFile.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/Format.h"
- #include "llvm/Support/ManagedStatic.h"
- #include "llvm/Support/MemoryBuffer.h"
- #include "llvm/Support/PrettyStackTrace.h"
- #include "llvm/Support/Regex.h"
- #include "llvm/Support/Signals.h"
- #include "llvm/Support/TargetSelect.h"
- #include "llvm/Support/raw_ostream.h"
- #include <algorithm>
- #include <cstring>
- #include <inttypes.h>
- #include <iostream>
- #include <map>
- #include <string>
- #include <system_error>
- #include <vector>
- #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
- #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
- #include "llvm/DebugInfo/GSYM/GsymCreator.h"
- #include "llvm/DebugInfo/GSYM/GsymReader.h"
- #include "llvm/DebugInfo/GSYM/InlineInfo.h"
- #include "llvm/DebugInfo/GSYM/LookupResult.h"
- #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
- #include <optional>
- using namespace llvm;
- using namespace gsym;
- using namespace object;
- /// @}
- /// Command line options.
- /// @{
- namespace {
- using namespace cl;
- OptionCategory GeneralOptions("Options");
- OptionCategory ConversionOptions("Conversion Options");
- OptionCategory LookupOptions("Lookup Options");
- static opt<bool> Help("h", desc("Alias for -help"), Hidden,
- cat(GeneralOptions));
- static opt<bool> Verbose("verbose",
- desc("Enable verbose logging and encoding details."),
- cat(GeneralOptions));
- static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
- cat(GeneralOptions));
- static opt<std::string>
- ConvertFilename("convert", cl::init(""),
- cl::desc("Convert the specified file to the GSYM format.\n"
- "Supported files include ELF and mach-o files "
- "that will have their debug info (DWARF) and "
- "symbol table converted."),
- cl::value_desc("path"), cat(ConversionOptions));
- static list<std::string>
- ArchFilters("arch",
- desc("Process debug information for the specified CPU "
- "architecture only.\nArchitectures may be specified by "
- "name or by number.\nThis option can be specified "
- "multiple times, once for each desired architecture."),
- cl::value_desc("arch"), cat(ConversionOptions));
- static opt<std::string>
- OutputFilename("out-file", cl::init(""),
- cl::desc("Specify the path where the converted GSYM file "
- "will be saved.\nWhen not specified, a '.gsym' "
- "extension will be appended to the file name "
- "specified in the --convert option."),
- cl::value_desc("path"), cat(ConversionOptions));
- static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
- aliasopt(OutputFilename),
- cat(ConversionOptions));
- static opt<bool> Verify("verify",
- desc("Verify the generated GSYM file against the "
- "information in the file that was converted."),
- cat(ConversionOptions));
- static opt<unsigned>
- NumThreads("num-threads",
- desc("Specify the maximum number (n) of simultaneous threads "
- "to use when converting files to GSYM.\nDefaults to the "
- "number of cores on the current machine."),
- cl::value_desc("n"), cat(ConversionOptions));
- static opt<bool>
- Quiet("quiet", desc("Do not output warnings about the debug information"),
- cat(ConversionOptions));
- static list<uint64_t> LookupAddresses("address",
- desc("Lookup an address in a GSYM file"),
- cl::value_desc("addr"),
- cat(LookupOptions));
- static opt<bool> LookupAddressesFromStdin(
- "addresses-from-stdin",
- desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
- "line is expected to be of the following format: <addr> <gsym-path>"),
- cat(LookupOptions));
- } // namespace
- /// @}
- //===----------------------------------------------------------------------===//
- static void error(Error Err) {
- if (!Err)
- return;
- WithColor::error() << toString(std::move(Err)) << "\n";
- exit(1);
- }
- static void error(StringRef Prefix, llvm::Error Err) {
- if (!Err)
- return;
- errs() << Prefix << ": " << Err << "\n";
- consumeError(std::move(Err));
- exit(1);
- }
- static void error(StringRef Prefix, std::error_code EC) {
- if (!EC)
- return;
- errs() << Prefix << ": " << EC.message() << "\n";
- exit(1);
- }
- static uint32_t getCPUType(MachOObjectFile &MachO) {
- if (MachO.is64Bit())
- return MachO.getHeader64().cputype;
- else
- return MachO.getHeader().cputype;
- }
- /// Return true if the object file has not been filtered by an --arch option.
- static bool filterArch(MachOObjectFile &Obj) {
- if (ArchFilters.empty())
- return true;
- Triple ObjTriple(Obj.getArchTriple());
- StringRef ObjArch = ObjTriple.getArchName();
- for (auto Arch : ArchFilters) {
- // Match name.
- if (Arch == ObjArch)
- return true;
- // Match architecture number.
- unsigned Value;
- if (!StringRef(Arch).getAsInteger(0, Value))
- if (Value == getCPUType(Obj))
- return true;
- }
- return false;
- }
- /// Determine the virtual address that is considered the base address of an ELF
- /// object file.
- ///
- /// The base address of an ELF file is the the "p_vaddr" of the first program
- /// header whose "p_type" is PT_LOAD.
- ///
- /// \param ELFFile An ELF object file we will search.
- ///
- /// \returns A valid image base address if we are able to extract one.
- template <class ELFT>
- static std::optional<uint64_t>
- getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
- auto PhdrRangeOrErr = ELFFile.program_headers();
- if (!PhdrRangeOrErr) {
- consumeError(PhdrRangeOrErr.takeError());
- return std::nullopt;
- }
- for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
- if (Phdr.p_type == ELF::PT_LOAD)
- return (uint64_t)Phdr.p_vaddr;
- return std::nullopt;
- }
- /// Determine the virtual address that is considered the base address of mach-o
- /// object file.
- ///
- /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment.
- ///
- /// \param MachO A mach-o object file we will search.
- ///
- /// \returns A valid image base address if we are able to extract one.
- static std::optional<uint64_t>
- getImageBaseAddress(const object::MachOObjectFile *MachO) {
- for (const auto &Command : MachO->load_commands()) {
- if (Command.C.cmd == MachO::LC_SEGMENT) {
- MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
- StringRef SegName = SLC.segname;
- if (SegName == "__TEXT")
- return SLC.vmaddr;
- } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
- MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
- StringRef SegName = SLC.segname;
- if (SegName == "__TEXT")
- return SLC.vmaddr;
- }
- }
- return std::nullopt;
- }
- /// Determine the virtual address that is considered the base address of an
- /// object file.
- ///
- /// Since GSYM files are used for symbolication, many clients will need to
- /// easily adjust addresses they find in stack traces so the lookups happen
- /// on unslid addresses from the original object file. If the base address of
- /// a GSYM file is set to the base address of the image, then this address
- /// adjusting is much easier.
- ///
- /// \param Obj An object file we will search.
- ///
- /// \returns A valid image base address if we are able to extract one.
- static std::optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
- if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
- return getImageBaseAddress(MachO);
- else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
- return getImageBaseAddress(ELFObj->getELFFile());
- else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
- return getImageBaseAddress(ELFObj->getELFFile());
- else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
- return getImageBaseAddress(ELFObj->getELFFile());
- else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
- return getImageBaseAddress(ELFObj->getELFFile());
- return std::nullopt;
- }
- static llvm::Error handleObjectFile(ObjectFile &Obj,
- const std::string &OutFile) {
- auto ThreadCount =
- NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
- auto &OS = outs();
- GsymCreator Gsym(Quiet);
- // See if we can figure out the base address for a given object file, and if
- // we can, then set the base address to use to this value. This will ease
- // symbolication since clients can slide the GSYM lookup addresses by using
- // the load bias of the shared library.
- if (auto ImageBaseAddr = getImageBaseAddress(Obj))
- Gsym.setBaseAddress(*ImageBaseAddr);
- // We need to know where the valid sections are that contain instructions.
- // See header documentation for DWARFTransformer::SetValidTextRanges() for
- // defails.
- AddressRanges TextRanges;
- for (const object::SectionRef &Sect : Obj.sections()) {
- if (!Sect.isText())
- continue;
- const uint64_t Size = Sect.getSize();
- if (Size == 0)
- continue;
- const uint64_t StartAddr = Sect.getAddress();
- TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
- }
- // Make sure there is DWARF to convert first.
- std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
- if (!DICtx)
- return createStringError(std::errc::invalid_argument,
- "unable to create DWARF context");
- // Make a DWARF transformer object and populate the ranges of the code
- // so we don't end up adding invalid functions to GSYM data.
- DwarfTransformer DT(*DICtx, OS, Gsym);
- if (!TextRanges.empty())
- Gsym.SetValidTextRanges(TextRanges);
- // Convert all DWARF to GSYM.
- if (auto Err = DT.convert(ThreadCount))
- return Err;
- // Get the UUID and convert symbol table to GSYM.
- if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
- return Err;
- // Finalize the GSYM to make it ready to save to disk. This will remove
- // duplicate FunctionInfo entries where we might have found an entry from
- // debug info and also a symbol table entry from the object file.
- if (auto Err = Gsym.finalize(OS))
- return Err;
- // Save the GSYM file to disk.
- support::endianness Endian =
- Obj.makeTriple().isLittleEndian() ? support::little : support::big;
- if (auto Err = Gsym.save(OutFile, Endian))
- return Err;
- // Verify the DWARF if requested. This will ensure all the info in the DWARF
- // can be looked up in the GSYM and that all lookups get matching data.
- if (Verify) {
- if (auto Err = DT.verify(OutFile))
- return Err;
- }
- return Error::success();
- }
- static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
- const std::string &OutFile) {
- Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
- error(Filename, errorToErrorCode(BinOrErr.takeError()));
- if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
- Triple ObjTriple(Obj->makeTriple());
- auto ArchName = ObjTriple.getArchName();
- outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
- if (auto Err = handleObjectFile(*Obj, OutFile))
- return Err;
- } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
- // Iterate over all contained architectures and filter out any that were
- // not specified with the "--arch <arch>" option. If the --arch option was
- // not specified on the command line, we will process all architectures.
- std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
- for (auto &ObjForArch : Fat->objects()) {
- if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
- auto &Obj = **MachOOrErr;
- if (filterArch(Obj))
- FilterObjs.emplace_back(MachOOrErr->release());
- } else {
- error(Filename, MachOOrErr.takeError());
- }
- }
- if (FilterObjs.empty())
- error(Filename, createStringError(std::errc::invalid_argument,
- "no matching architectures found"));
- // Now handle each architecture we need to convert.
- for (auto &Obj : FilterObjs) {
- Triple ObjTriple(Obj->getArchTriple());
- auto ArchName = ObjTriple.getArchName();
- std::string ArchOutFile(OutFile);
- // If we are only handling a single architecture, then we will use the
- // normal output file. If we are handling multiple architectures append
- // the architecture name to the end of the out file path so that we
- // don't overwrite the previous architecture's gsym file.
- if (FilterObjs.size() > 1) {
- ArchOutFile.append(1, '.');
- ArchOutFile.append(ArchName.str());
- }
- outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
- if (auto Err = handleObjectFile(*Obj, ArchOutFile))
- return Err;
- }
- }
- return Error::success();
- }
- static llvm::Error handleFileConversionToGSYM(StringRef Filename,
- const std::string &OutFile) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
- MemoryBuffer::getFileOrSTDIN(Filename);
- error(Filename, BuffOrErr.getError());
- std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
- return handleBuffer(Filename, *Buffer, OutFile);
- }
- static llvm::Error convertFileToGSYM(raw_ostream &OS) {
- // Expand any .dSYM bundles to the individual object files contained therein.
- std::vector<std::string> Objects;
- std::string OutFile = OutputFilename;
- if (OutFile.empty()) {
- OutFile = ConvertFilename;
- OutFile += ".gsym";
- }
- OS << "Input file: " << ConvertFilename << "\n";
- if (auto DsymObjectsOrErr =
- MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
- if (DsymObjectsOrErr->empty())
- Objects.push_back(ConvertFilename);
- else
- llvm::append_range(Objects, *DsymObjectsOrErr);
- } else {
- error(DsymObjectsOrErr.takeError());
- }
- for (auto Object : Objects) {
- if (auto Err = handleFileConversionToGSYM(Object, OutFile))
- return Err;
- }
- return Error::success();
- }
- static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
- if (auto Result = Gsym.lookup(Addr)) {
- // If verbose is enabled dump the full function info for the address.
- if (Verbose) {
- if (auto FI = Gsym.getFunctionInfo(Addr)) {
- OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
- Gsym.dump(OS, *FI);
- OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
- }
- }
- OS << Result.get();
- } else {
- if (Verbose)
- OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
- OS << HEX64(Addr) << ": ";
- logAllUnhandledErrors(Result.takeError(), OS, "error: ");
- }
- if (Verbose)
- OS << "\n";
- }
- int main(int argc, char const *argv[]) {
- // Print a stack trace if we signal out.
- sys::PrintStackTraceOnErrorSignal(argv[0]);
- PrettyStackTraceProgram X(argc, argv);
- llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
- llvm::InitializeAllTargets();
- const char *Overview =
- "A tool for dumping, searching and creating GSYM files.\n\n"
- "Specify one or more GSYM paths as arguments to dump all of the "
- "information in each GSYM file.\n"
- "Specify a single GSYM file along with one or more --lookup options to "
- "lookup addresses within that GSYM file.\n"
- "Use the --convert option to specify a file with option --out-file "
- "option to convert to GSYM format.\n";
- HideUnrelatedOptions({&GeneralOptions, &ConversionOptions, &LookupOptions});
- cl::ParseCommandLineOptions(argc, argv, Overview);
- if (Help) {
- PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
- return 0;
- }
- raw_ostream &OS = outs();
- if (!ConvertFilename.empty()) {
- // Convert DWARF to GSYM
- if (!InputFilenames.empty()) {
- OS << "error: no input files can be specified when using the --convert "
- "option.\n";
- return 1;
- }
- // Call error() if we have an error and it will exit with a status of 1
- if (auto Err = convertFileToGSYM(OS))
- error("DWARF conversion failed: ", std::move(Err));
- return 0;
- }
- if (LookupAddressesFromStdin) {
- if (!LookupAddresses.empty() || !InputFilenames.empty()) {
- OS << "error: no input files or addresses can be specified when using "
- "the --addresses-from-stdin "
- "option.\n";
- return 1;
- }
- std::string InputLine;
- std::string CurrentGSYMPath;
- std::optional<Expected<GsymReader>> CurrentGsym;
- while (std::getline(std::cin, InputLine)) {
- // Strip newline characters.
- std::string StrippedInputLine(InputLine);
- llvm::erase_if(StrippedInputLine,
- [](char c) { return c == '\r' || c == '\n'; });
- StringRef AddrStr, GSYMPath;
- std::tie(AddrStr, GSYMPath) =
- llvm::StringRef{StrippedInputLine}.split(' ');
- if (GSYMPath != CurrentGSYMPath) {
- CurrentGsym = GsymReader::openFile(GSYMPath);
- if (!*CurrentGsym)
- error(GSYMPath, CurrentGsym->takeError());
- CurrentGSYMPath = GSYMPath;
- }
- uint64_t Addr;
- if (AddrStr.getAsInteger(0, Addr)) {
- OS << "error: invalid address " << AddrStr
- << ", expected: Address GsymFile.\n";
- return 1;
- }
- doLookup(**CurrentGsym, Addr, OS);
- OS << "\n";
- OS.flush();
- }
- return EXIT_SUCCESS;
- }
- // Dump or access data inside GSYM files
- for (const auto &GSYMPath : InputFilenames) {
- auto Gsym = GsymReader::openFile(GSYMPath);
- if (!Gsym)
- error(GSYMPath, Gsym.takeError());
- if (LookupAddresses.empty()) {
- Gsym->dump(outs());
- continue;
- }
- // Lookup an address in a GSYM file and print any matches.
- OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
- for (auto Addr : LookupAddresses) {
- doLookup(*Gsym, Addr, OS);
- }
- }
- return EXIT_SUCCESS;
- }
|