llvm-symbolizer.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This utility works much like "addr2line". It is able of transforming
  10. // tuples (module name, module offset) to code locations (function name,
  11. // file, line number, column number). It is targeted for compiler-rt tools
  12. // (especially AddressSanitizer and ThreadSanitizer) that can use it
  13. // to symbolize stack traces in their error reports.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "Opts.inc"
  17. #include "llvm/ADT/StringRef.h"
  18. #include "llvm/Config/config.h"
  19. #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
  20. #include "llvm/DebugInfo/Symbolize/Symbolize.h"
  21. #include "llvm/Option/Arg.h"
  22. #include "llvm/Option/ArgList.h"
  23. #include "llvm/Option/Option.h"
  24. #include "llvm/Support/COM.h"
  25. #include "llvm/Support/CommandLine.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/FileSystem.h"
  28. #include "llvm/Support/InitLLVM.h"
  29. #include "llvm/Support/Path.h"
  30. #include "llvm/Support/StringSaver.h"
  31. #include "llvm/Support/raw_ostream.h"
  32. #include <algorithm>
  33. #include <cstdio>
  34. #include <cstring>
  35. #include <string>
  36. using namespace llvm;
  37. using namespace symbolize;
  38. namespace {
  39. enum ID {
  40. OPT_INVALID = 0, // This is not an option ID.
  41. #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
  42. HELPTEXT, METAVAR, VALUES) \
  43. OPT_##ID,
  44. #include "Opts.inc"
  45. #undef OPTION
  46. };
  47. #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
  48. #include "Opts.inc"
  49. #undef PREFIX
  50. static const opt::OptTable::Info InfoTable[] = {
  51. #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
  52. HELPTEXT, METAVAR, VALUES) \
  53. { \
  54. PREFIX, NAME, HELPTEXT, \
  55. METAVAR, OPT_##ID, opt::Option::KIND##Class, \
  56. PARAM, FLAGS, OPT_##GROUP, \
  57. OPT_##ALIAS, ALIASARGS, VALUES},
  58. #include "Opts.inc"
  59. #undef OPTION
  60. };
  61. class SymbolizerOptTable : public opt::OptTable {
  62. public:
  63. SymbolizerOptTable() : OptTable(InfoTable, true) {}
  64. };
  65. } // namespace
  66. static cl::list<std::string> ClInputAddresses(cl::Positional,
  67. cl::desc("<input addresses>..."),
  68. cl::ZeroOrMore);
  69. template<typename T>
  70. static bool error(Expected<T> &ResOrErr) {
  71. if (ResOrErr)
  72. return false;
  73. logAllUnhandledErrors(ResOrErr.takeError(), errs(),
  74. "LLVMSymbolizer: error reading file: ");
  75. return true;
  76. }
  77. enum class Command {
  78. Code,
  79. Data,
  80. Frame,
  81. };
  82. static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
  83. StringRef InputString, Command &Cmd,
  84. std::string &ModuleName, uint64_t &ModuleOffset) {
  85. const char kDelimiters[] = " \n\r";
  86. ModuleName = "";
  87. if (InputString.consume_front("CODE ")) {
  88. Cmd = Command::Code;
  89. } else if (InputString.consume_front("DATA ")) {
  90. Cmd = Command::Data;
  91. } else if (InputString.consume_front("FRAME ")) {
  92. Cmd = Command::Frame;
  93. } else {
  94. // If no cmd, assume it's CODE.
  95. Cmd = Command::Code;
  96. }
  97. const char *Pos = InputString.data();
  98. // Skip delimiters and parse input filename (if needed).
  99. if (BinaryName.empty()) {
  100. Pos += strspn(Pos, kDelimiters);
  101. if (*Pos == '"' || *Pos == '\'') {
  102. char Quote = *Pos;
  103. Pos++;
  104. const char *End = strchr(Pos, Quote);
  105. if (!End)
  106. return false;
  107. ModuleName = std::string(Pos, End - Pos);
  108. Pos = End + 1;
  109. } else {
  110. int NameLength = strcspn(Pos, kDelimiters);
  111. ModuleName = std::string(Pos, NameLength);
  112. Pos += NameLength;
  113. }
  114. } else {
  115. ModuleName = BinaryName.str();
  116. }
  117. // Skip delimiters and parse module offset.
  118. Pos += strspn(Pos, kDelimiters);
  119. int OffsetLength = strcspn(Pos, kDelimiters);
  120. StringRef Offset(Pos, OffsetLength);
  121. // GNU addr2line assumes the offset is hexadecimal and allows a redundant
  122. // "0x" or "0X" prefix; do the same for compatibility.
  123. if (IsAddr2Line)
  124. Offset.consume_front("0x") || Offset.consume_front("0X");
  125. return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
  126. }
  127. static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
  128. bool IsAddr2Line, DIPrinter::OutputStyle OutputStyle,
  129. StringRef InputString, LLVMSymbolizer &Symbolizer,
  130. DIPrinter &Printer) {
  131. Command Cmd;
  132. std::string ModuleName;
  133. uint64_t Offset = 0;
  134. if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
  135. StringRef(InputString), Cmd, ModuleName, Offset)) {
  136. outs() << InputString << "\n";
  137. return;
  138. }
  139. if (Args.hasArg(OPT_addresses)) {
  140. outs() << "0x";
  141. outs().write_hex(Offset);
  142. StringRef Delimiter = Args.hasArg(OPT_pretty_print) ? ": " : "\n";
  143. outs() << Delimiter;
  144. }
  145. Offset -= AdjustVMA;
  146. if (Cmd == Command::Data) {
  147. auto ResOrErr = Symbolizer.symbolizeData(
  148. ModuleName, {Offset, object::SectionedAddress::UndefSection});
  149. Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
  150. } else if (Cmd == Command::Frame) {
  151. auto ResOrErr = Symbolizer.symbolizeFrame(
  152. ModuleName, {Offset, object::SectionedAddress::UndefSection});
  153. if (!error(ResOrErr)) {
  154. for (DILocal Local : *ResOrErr)
  155. Printer << Local;
  156. if (ResOrErr->empty())
  157. outs() << "??\n";
  158. }
  159. } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
  160. auto ResOrErr = Symbolizer.symbolizeInlinedCode(
  161. ModuleName, {Offset, object::SectionedAddress::UndefSection});
  162. Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
  163. } else if (OutputStyle == DIPrinter::OutputStyle::GNU) {
  164. // With PrintFunctions == FunctionNameKind::LinkageName (default)
  165. // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
  166. // may override the name of an inlined function with the name of the topmost
  167. // caller function in the inlining chain. This contradicts the existing
  168. // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
  169. // the topmost function, which suits our needs better.
  170. auto ResOrErr = Symbolizer.symbolizeInlinedCode(
  171. ModuleName, {Offset, object::SectionedAddress::UndefSection});
  172. if (!ResOrErr || ResOrErr->getNumberOfFrames() == 0) {
  173. error(ResOrErr);
  174. Printer << DILineInfo();
  175. } else {
  176. Printer << ResOrErr->getFrame(0);
  177. }
  178. } else {
  179. auto ResOrErr = Symbolizer.symbolizeCode(
  180. ModuleName, {Offset, object::SectionedAddress::UndefSection});
  181. Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
  182. }
  183. if (OutputStyle == DIPrinter::OutputStyle::LLVM)
  184. outs() << "\n";
  185. }
  186. static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
  187. raw_ostream &OS) {
  188. const char HelpText[] = " [options] addresses...";
  189. Tbl.PrintHelp(OS, (ToolName + HelpText).str().c_str(),
  190. ToolName.str().c_str());
  191. // TODO Replace this with OptTable API once it adds extrahelp support.
  192. OS << "\nPass @FILE as argument to read options from FILE.\n";
  193. }
  194. static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
  195. StringSaver &Saver,
  196. SymbolizerOptTable &Tbl) {
  197. StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
  198. Tbl.setGroupedShortOptions(true);
  199. // The environment variable specifies initial options which can be overridden
  200. // by commnad line options.
  201. Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
  202. : "LLVM_SYMBOLIZER_OPTS");
  203. bool HasError = false;
  204. opt::InputArgList Args =
  205. Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
  206. errs() << ("error: " + Msg + "\n");
  207. HasError = true;
  208. });
  209. if (HasError)
  210. exit(1);
  211. if (Args.hasArg(OPT_help)) {
  212. printHelp(ToolName, Tbl, outs());
  213. exit(0);
  214. }
  215. if (Args.hasArg(OPT_version)) {
  216. outs() << ToolName << '\n';
  217. cl::PrintVersionMessage();
  218. exit(0);
  219. }
  220. return Args;
  221. }
  222. template <typename T>
  223. static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
  224. if (const opt::Arg *A = Args.getLastArg(ID)) {
  225. StringRef V(A->getValue());
  226. if (!llvm::to_integer(V, Value, 0)) {
  227. errs() << A->getSpelling() +
  228. ": expected a non-negative integer, but got '" + V + "'";
  229. exit(1);
  230. }
  231. } else {
  232. Value = 0;
  233. }
  234. }
  235. static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
  236. bool IsAddr2Line) {
  237. if (Args.hasArg(OPT_functions))
  238. return FunctionNameKind::LinkageName;
  239. if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
  240. return StringSwitch<FunctionNameKind>(A->getValue())
  241. .Case("none", FunctionNameKind::None)
  242. .Case("short", FunctionNameKind::ShortName)
  243. .Default(FunctionNameKind::LinkageName);
  244. return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
  245. }
  246. int main(int argc, char **argv) {
  247. InitLLVM X(argc, argv);
  248. sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
  249. bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
  250. BumpPtrAllocator A;
  251. StringSaver Saver(A);
  252. SymbolizerOptTable Tbl;
  253. opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
  254. LLVMSymbolizer::Options Opts;
  255. uint64_t AdjustVMA;
  256. unsigned SourceContextLines;
  257. parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
  258. if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
  259. Opts.PathStyle =
  260. A->getOption().matches(OPT_basenames)
  261. ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
  262. : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
  263. } else {
  264. Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
  265. }
  266. Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
  267. Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
  268. Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
  269. Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
  270. Opts.FallbackDebugPath =
  271. Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
  272. Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
  273. parseIntArg(Args, OPT_print_source_context_lines_EQ, SourceContextLines);
  274. Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
  275. Opts.UntagAddresses =
  276. Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
  277. Opts.UseDIA = Args.hasArg(OPT_use_dia);
  278. #if !defined(LLVM_ENABLE_DIA_SDK)
  279. if (Opts.UseDIA) {
  280. WithColor::warning() << "DIA not available; using native PDB reader\n";
  281. Opts.UseDIA = false;
  282. }
  283. #endif
  284. Opts.UseSymbolTable = true;
  285. for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
  286. StringRef Hint(A->getValue());
  287. if (sys::path::extension(Hint) == ".dSYM") {
  288. Opts.DsymHints.emplace_back(Hint);
  289. } else {
  290. errs() << "Warning: invalid dSYM hint: \"" << Hint
  291. << "\" (must have the '.dSYM' extension).\n";
  292. }
  293. }
  294. auto OutputStyle =
  295. IsAddr2Line ? DIPrinter::OutputStyle::GNU : DIPrinter::OutputStyle::LLVM;
  296. if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
  297. OutputStyle = strcmp(A->getValue(), "GNU") == 0
  298. ? DIPrinter::OutputStyle::GNU
  299. : DIPrinter::OutputStyle::LLVM;
  300. }
  301. LLVMSymbolizer Symbolizer(Opts);
  302. DIPrinter Printer(outs(), Opts.PrintFunctions != FunctionNameKind::None,
  303. Args.hasArg(OPT_pretty_print), SourceContextLines,
  304. Args.hasArg(OPT_verbose), OutputStyle);
  305. std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
  306. if (InputAddresses.empty()) {
  307. const int kMaxInputStringLength = 1024;
  308. char InputString[kMaxInputStringLength];
  309. while (fgets(InputString, sizeof(InputString), stdin)) {
  310. // Strip newline characters.
  311. std::string StrippedInputString(InputString);
  312. llvm::erase_if(StrippedInputString,
  313. [](char c) { return c == '\r' || c == '\n'; });
  314. symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle,
  315. StrippedInputString, Symbolizer, Printer);
  316. outs().flush();
  317. }
  318. } else {
  319. for (StringRef Address : InputAddresses)
  320. symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle, Address,
  321. Symbolizer, Printer);
  322. }
  323. return 0;
  324. }