llvm-symbolizer.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This utility works much like "addr2line". It is able of transforming
  10. // tuples (module name, module offset) to code locations (function name,
  11. // file, line number, column number). It is targeted for compiler-rt tools
  12. // (especially AddressSanitizer and ThreadSanitizer) that can use it
  13. // to symbolize stack traces in their error reports.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "Opts.inc"
  17. #include "llvm/ADT/StringExtras.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/Config/config.h"
  20. #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
  21. #include "llvm/DebugInfo/Symbolize/Markup.h"
  22. #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
  23. #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
  24. #include "llvm/DebugInfo/Symbolize/Symbolize.h"
  25. #include "llvm/Debuginfod/BuildIDFetcher.h"
  26. #include "llvm/Debuginfod/Debuginfod.h"
  27. #include "llvm/Debuginfod/HTTPClient.h"
  28. #include "llvm/Option/Arg.h"
  29. #include "llvm/Option/ArgList.h"
  30. #include "llvm/Option/Option.h"
  31. #include "llvm/Support/COM.h"
  32. #include "llvm/Support/CommandLine.h"
  33. #include "llvm/Support/Debug.h"
  34. #include "llvm/Support/FileSystem.h"
  35. #include "llvm/Support/InitLLVM.h"
  36. #include "llvm/Support/Path.h"
  37. #include "llvm/Support/StringSaver.h"
  38. #include "llvm/Support/raw_ostream.h"
  39. #include <algorithm>
  40. #include <cstdio>
  41. #include <cstring>
  42. #include <iostream>
  43. #include <string>
  44. using namespace llvm;
  45. using namespace symbolize;
  46. namespace {
  47. enum ID {
  48. OPT_INVALID = 0, // This is not an option ID.
  49. #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
  50. HELPTEXT, METAVAR, VALUES) \
  51. OPT_##ID,
  52. #include "Opts.inc"
  53. #undef OPTION
  54. };
  55. #define PREFIX(NAME, VALUE) \
  56. static constexpr StringLiteral NAME##_init[] = VALUE; \
  57. static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
  58. std::size(NAME##_init) - 1);
  59. #include "Opts.inc"
  60. #undef PREFIX
  61. static constexpr opt::OptTable::Info InfoTable[] = {
  62. #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
  63. HELPTEXT, METAVAR, VALUES) \
  64. { \
  65. PREFIX, NAME, HELPTEXT, \
  66. METAVAR, OPT_##ID, opt::Option::KIND##Class, \
  67. PARAM, FLAGS, OPT_##GROUP, \
  68. OPT_##ALIAS, ALIASARGS, VALUES},
  69. #include "Opts.inc"
  70. #undef OPTION
  71. };
  72. class SymbolizerOptTable : public opt::GenericOptTable {
  73. public:
  74. SymbolizerOptTable() : GenericOptTable(InfoTable) {
  75. setGroupedShortOptions(true);
  76. }
  77. };
  78. } // namespace
  79. template <typename T>
  80. static void print(const Request &Request, Expected<T> &ResOrErr,
  81. DIPrinter &Printer) {
  82. if (ResOrErr) {
  83. // No error, print the result.
  84. Printer.print(Request, *ResOrErr);
  85. return;
  86. }
  87. // Handle the error.
  88. bool PrintEmpty = true;
  89. handleAllErrors(std::move(ResOrErr.takeError()),
  90. [&](const ErrorInfoBase &EI) {
  91. PrintEmpty = Printer.printError(
  92. Request, EI, "LLVMSymbolizer: error reading file: ");
  93. });
  94. if (PrintEmpty)
  95. Printer.print(Request, T());
  96. }
  97. enum class OutputStyle { LLVM, GNU, JSON };
  98. enum class Command {
  99. Code,
  100. Data,
  101. Frame,
  102. };
  103. static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
  104. const opt::ArgList &Args) {
  105. static bool IsEnabled = false;
  106. if (IsEnabled)
  107. return;
  108. IsEnabled = true;
  109. // Look up symbols using the debuginfod client.
  110. Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>(
  111. Args.getAllArgValues(OPT_debug_file_directory_EQ)));
  112. // The HTTPClient must be initialized for use by the debuginfod client.
  113. HTTPClient::initialize();
  114. }
  115. static object::BuildID parseBuildID(StringRef Str) {
  116. std::string Bytes;
  117. if (!tryGetFromHex(Str, Bytes))
  118. return {};
  119. ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
  120. Bytes.size());
  121. return object::BuildID(BuildID.begin(), BuildID.end());
  122. }
  123. static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
  124. StringRef InputString, Command &Cmd,
  125. std::string &ModuleName, object::BuildID &BuildID,
  126. uint64_t &ModuleOffset) {
  127. const char kDelimiters[] = " \n\r";
  128. ModuleName = "";
  129. if (InputString.consume_front("CODE ")) {
  130. Cmd = Command::Code;
  131. } else if (InputString.consume_front("DATA ")) {
  132. Cmd = Command::Data;
  133. } else if (InputString.consume_front("FRAME ")) {
  134. Cmd = Command::Frame;
  135. } else {
  136. // If no cmd, assume it's CODE.
  137. Cmd = Command::Code;
  138. }
  139. const char *Pos;
  140. // Skip delimiters and parse input filename (if needed).
  141. if (BinaryName.empty() && BuildID.empty()) {
  142. bool HasFilePrefix = false;
  143. bool HasBuildIDPrefix = false;
  144. while (true) {
  145. if (InputString.consume_front("FILE:")) {
  146. if (HasFilePrefix)
  147. return false;
  148. HasFilePrefix = true;
  149. continue;
  150. }
  151. if (InputString.consume_front("BUILDID:")) {
  152. if (HasBuildIDPrefix)
  153. return false;
  154. HasBuildIDPrefix = true;
  155. continue;
  156. }
  157. break;
  158. }
  159. if (HasFilePrefix && HasBuildIDPrefix)
  160. return false;
  161. Pos = InputString.data();
  162. Pos += strspn(Pos, kDelimiters);
  163. if (*Pos == '"' || *Pos == '\'') {
  164. char Quote = *Pos;
  165. Pos++;
  166. const char *End = strchr(Pos, Quote);
  167. if (!End)
  168. return false;
  169. ModuleName = std::string(Pos, End - Pos);
  170. Pos = End + 1;
  171. } else {
  172. int NameLength = strcspn(Pos, kDelimiters);
  173. ModuleName = std::string(Pos, NameLength);
  174. Pos += NameLength;
  175. }
  176. if (HasBuildIDPrefix) {
  177. BuildID = parseBuildID(ModuleName);
  178. if (BuildID.empty())
  179. return false;
  180. ModuleName.clear();
  181. }
  182. } else {
  183. Pos = InputString.data();
  184. ModuleName = BinaryName.str();
  185. }
  186. // Skip delimiters and parse module offset.
  187. Pos += strspn(Pos, kDelimiters);
  188. int OffsetLength = strcspn(Pos, kDelimiters);
  189. StringRef Offset(Pos, OffsetLength);
  190. // GNU addr2line assumes the offset is hexadecimal and allows a redundant
  191. // "0x" or "0X" prefix; do the same for compatibility.
  192. if (IsAddr2Line)
  193. Offset.consume_front("0x") || Offset.consume_front("0X");
  194. return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
  195. }
  196. template <typename T>
  197. void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
  198. uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
  199. OutputStyle Style, LLVMSymbolizer &Symbolizer,
  200. DIPrinter &Printer) {
  201. uint64_t AdjustedOffset = Offset - AdjustVMA;
  202. object::SectionedAddress Address = {AdjustedOffset,
  203. object::SectionedAddress::UndefSection};
  204. if (Cmd == Command::Data) {
  205. Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
  206. print({ModuleName, Offset}, ResOrErr, Printer);
  207. } else if (Cmd == Command::Frame) {
  208. Expected<std::vector<DILocal>> ResOrErr =
  209. Symbolizer.symbolizeFrame(ModuleSpec, Address);
  210. print({ModuleName, Offset}, ResOrErr, Printer);
  211. } else if (ShouldInline) {
  212. Expected<DIInliningInfo> ResOrErr =
  213. Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
  214. print({ModuleName, Offset}, ResOrErr, Printer);
  215. } else if (Style == OutputStyle::GNU) {
  216. // With PrintFunctions == FunctionNameKind::LinkageName (default)
  217. // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
  218. // may override the name of an inlined function with the name of the topmost
  219. // caller function in the inlining chain. This contradicts the existing
  220. // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
  221. // the topmost function, which suits our needs better.
  222. Expected<DIInliningInfo> ResOrErr =
  223. Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
  224. Expected<DILineInfo> Res0OrErr =
  225. !ResOrErr
  226. ? Expected<DILineInfo>(ResOrErr.takeError())
  227. : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
  228. : ResOrErr->getFrame(0));
  229. print({ModuleName, Offset}, Res0OrErr, Printer);
  230. } else {
  231. Expected<DILineInfo> ResOrErr =
  232. Symbolizer.symbolizeCode(ModuleSpec, Address);
  233. print({ModuleName, Offset}, ResOrErr, Printer);
  234. }
  235. Symbolizer.pruneCache();
  236. }
  237. static void symbolizeInput(const opt::InputArgList &Args,
  238. object::BuildIDRef IncomingBuildID,
  239. uint64_t AdjustVMA, bool IsAddr2Line,
  240. OutputStyle Style, StringRef InputString,
  241. LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
  242. Command Cmd;
  243. std::string ModuleName;
  244. object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
  245. uint64_t Offset = 0;
  246. if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
  247. StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
  248. Printer.printInvalidCommand({ModuleName, std::nullopt}, InputString);
  249. return;
  250. }
  251. bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
  252. if (!BuildID.empty()) {
  253. assert(ModuleName.empty());
  254. if (!Args.hasArg(OPT_no_debuginfod))
  255. enableDebuginfod(Symbolizer, Args);
  256. std::string BuildIDStr = toHex(BuildID);
  257. executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
  258. Style, Symbolizer, Printer);
  259. } else {
  260. executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
  261. Style, Symbolizer, Printer);
  262. }
  263. }
  264. static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
  265. raw_ostream &OS) {
  266. const char HelpText[] = " [options] addresses...";
  267. Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
  268. ToolName.str().c_str());
  269. // TODO Replace this with OptTable API once it adds extrahelp support.
  270. OS << "\nPass @FILE as argument to read options from FILE.\n";
  271. }
  272. static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
  273. StringSaver &Saver,
  274. SymbolizerOptTable &Tbl) {
  275. StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
  276. // The environment variable specifies initial options which can be overridden
  277. // by commnad line options.
  278. Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
  279. : "LLVM_SYMBOLIZER_OPTS");
  280. bool HasError = false;
  281. opt::InputArgList Args =
  282. Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
  283. errs() << ("error: " + Msg + "\n");
  284. HasError = true;
  285. });
  286. if (HasError)
  287. exit(1);
  288. if (Args.hasArg(OPT_help)) {
  289. printHelp(ToolName, Tbl, outs());
  290. exit(0);
  291. }
  292. if (Args.hasArg(OPT_version)) {
  293. outs() << ToolName << '\n';
  294. cl::PrintVersionMessage();
  295. exit(0);
  296. }
  297. return Args;
  298. }
  299. template <typename T>
  300. static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
  301. if (const opt::Arg *A = Args.getLastArg(ID)) {
  302. StringRef V(A->getValue());
  303. if (!llvm::to_integer(V, Value, 0)) {
  304. errs() << A->getSpelling() +
  305. ": expected a non-negative integer, but got '" + V + "'";
  306. exit(1);
  307. }
  308. } else {
  309. Value = 0;
  310. }
  311. }
  312. static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
  313. bool IsAddr2Line) {
  314. if (Args.hasArg(OPT_functions))
  315. return FunctionNameKind::LinkageName;
  316. if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
  317. return StringSwitch<FunctionNameKind>(A->getValue())
  318. .Case("none", FunctionNameKind::None)
  319. .Case("short", FunctionNameKind::ShortName)
  320. .Default(FunctionNameKind::LinkageName);
  321. return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
  322. }
  323. static std::optional<bool> parseColorArg(const opt::InputArgList &Args) {
  324. if (Args.hasArg(OPT_color))
  325. return true;
  326. if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
  327. return StringSwitch<std::optional<bool>>(A->getValue())
  328. .Case("always", true)
  329. .Case("never", false)
  330. .Case("auto", std::nullopt);
  331. return std::nullopt;
  332. }
  333. static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) {
  334. const opt::Arg *A = Args.getLastArg(ID);
  335. if (!A)
  336. return {};
  337. StringRef V(A->getValue());
  338. object::BuildID BuildID = parseBuildID(V);
  339. if (BuildID.empty()) {
  340. errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
  341. exit(1);
  342. }
  343. return BuildID;
  344. }
  345. // Symbolize markup from stdin and write the result to stdout.
  346. static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {
  347. MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));
  348. std::string InputString;
  349. while (std::getline(std::cin, InputString)) {
  350. InputString += '\n';
  351. Filter.filter(InputString);
  352. }
  353. Filter.finish();
  354. }
  355. ExitOnError ExitOnErr;
  356. int main(int argc, char **argv) {
  357. InitLLVM X(argc, argv);
  358. sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
  359. bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
  360. BumpPtrAllocator A;
  361. StringSaver Saver(A);
  362. SymbolizerOptTable Tbl;
  363. opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
  364. LLVMSymbolizer::Options Opts;
  365. uint64_t AdjustVMA;
  366. PrinterConfig Config;
  367. parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
  368. if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
  369. Opts.PathStyle =
  370. A->getOption().matches(OPT_basenames)
  371. ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
  372. : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
  373. } else {
  374. Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
  375. }
  376. Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
  377. Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
  378. Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
  379. Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
  380. Opts.FallbackDebugPath =
  381. Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
  382. Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
  383. parseIntArg(Args, OPT_print_source_context_lines_EQ,
  384. Config.SourceContextLines);
  385. Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
  386. Opts.UntagAddresses =
  387. Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
  388. Opts.UseDIA = Args.hasArg(OPT_use_dia);
  389. #if !defined(LLVM_ENABLE_DIA_SDK)
  390. if (Opts.UseDIA) {
  391. WithColor::warning() << "DIA not available; using native PDB reader\n";
  392. Opts.UseDIA = false;
  393. }
  394. #endif
  395. Opts.UseSymbolTable = true;
  396. if (Args.hasArg(OPT_cache_size_EQ))
  397. parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
  398. Config.PrintAddress = Args.hasArg(OPT_addresses);
  399. Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
  400. Config.Pretty = Args.hasArg(OPT_pretty_print);
  401. Config.Verbose = Args.hasArg(OPT_verbose);
  402. for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
  403. StringRef Hint(A->getValue());
  404. if (sys::path::extension(Hint) == ".dSYM") {
  405. Opts.DsymHints.emplace_back(Hint);
  406. } else {
  407. errs() << "Warning: invalid dSYM hint: \"" << Hint
  408. << "\" (must have the '.dSYM' extension).\n";
  409. }
  410. }
  411. LLVMSymbolizer Symbolizer(Opts);
  412. if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))
  413. enableDebuginfod(Symbolizer, Args);
  414. if (Args.hasArg(OPT_filter_markup)) {
  415. filterMarkup(Args, Symbolizer);
  416. return 0;
  417. }
  418. auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
  419. if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
  420. if (strcmp(A->getValue(), "GNU") == 0)
  421. Style = OutputStyle::GNU;
  422. else if (strcmp(A->getValue(), "JSON") == 0)
  423. Style = OutputStyle::JSON;
  424. else
  425. Style = OutputStyle::LLVM;
  426. }
  427. if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
  428. errs() << "error: cannot specify both --build-id and --obj\n";
  429. return EXIT_FAILURE;
  430. }
  431. object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
  432. std::unique_ptr<DIPrinter> Printer;
  433. if (Style == OutputStyle::GNU)
  434. Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
  435. else if (Style == OutputStyle::JSON)
  436. Printer = std::make_unique<JSONPrinter>(outs(), Config);
  437. else
  438. Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
  439. std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
  440. if (InputAddresses.empty()) {
  441. const int kMaxInputStringLength = 1024;
  442. char InputString[kMaxInputStringLength];
  443. while (fgets(InputString, sizeof(InputString), stdin)) {
  444. // Strip newline characters.
  445. std::string StrippedInputString(InputString);
  446. llvm::erase_if(StrippedInputString,
  447. [](char c) { return c == '\r' || c == '\n'; });
  448. symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
  449. StrippedInputString, Symbolizer, *Printer);
  450. outs().flush();
  451. }
  452. } else {
  453. Printer->listBegin();
  454. for (StringRef Address : InputAddresses)
  455. symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
  456. Symbolizer, *Printer);
  457. Printer->listEnd();
  458. }
  459. return 0;
  460. }