sanitizer_symbolizer_libcdep.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is shared between AddressSanitizer and ThreadSanitizer
  10. // run-time libraries.
  11. //===----------------------------------------------------------------------===//
  12. #include "sanitizer_allocator_internal.h"
  13. #include "sanitizer_internal_defs.h"
  14. #include "sanitizer_platform.h"
  15. #include "sanitizer_symbolizer_internal.h"
  16. namespace __sanitizer {
  17. Symbolizer *Symbolizer::GetOrInit() {
  18. SpinMutexLock l(&init_mu_);
  19. if (symbolizer_)
  20. return symbolizer_;
  21. symbolizer_ = PlatformInit();
  22. CHECK(symbolizer_);
  23. return symbolizer_;
  24. }
  25. // See sanitizer_symbolizer_markup.cpp.
  26. #if !SANITIZER_SYMBOLIZER_MARKUP
  27. const char *ExtractToken(const char *str, const char *delims, char **result) {
  28. uptr prefix_len = internal_strcspn(str, delims);
  29. *result = (char*)InternalAlloc(prefix_len + 1);
  30. internal_memcpy(*result, str, prefix_len);
  31. (*result)[prefix_len] = '\0';
  32. const char *prefix_end = str + prefix_len;
  33. if (*prefix_end != '\0') prefix_end++;
  34. return prefix_end;
  35. }
  36. const char *ExtractInt(const char *str, const char *delims, int *result) {
  37. char *buff = nullptr;
  38. const char *ret = ExtractToken(str, delims, &buff);
  39. if (buff) {
  40. *result = (int)internal_atoll(buff);
  41. }
  42. InternalFree(buff);
  43. return ret;
  44. }
  45. const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
  46. char *buff = nullptr;
  47. const char *ret = ExtractToken(str, delims, &buff);
  48. if (buff) {
  49. *result = (uptr)internal_atoll(buff);
  50. }
  51. InternalFree(buff);
  52. return ret;
  53. }
  54. const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
  55. char *buff = nullptr;
  56. const char *ret = ExtractToken(str, delims, &buff);
  57. if (buff) {
  58. *result = (sptr)internal_atoll(buff);
  59. }
  60. InternalFree(buff);
  61. return ret;
  62. }
  63. const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
  64. char **result) {
  65. const char *found_delimiter = internal_strstr(str, delimiter);
  66. uptr prefix_len =
  67. found_delimiter ? found_delimiter - str : internal_strlen(str);
  68. *result = (char *)InternalAlloc(prefix_len + 1);
  69. internal_memcpy(*result, str, prefix_len);
  70. (*result)[prefix_len] = '\0';
  71. const char *prefix_end = str + prefix_len;
  72. if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
  73. return prefix_end;
  74. }
  75. SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
  76. Lock l(&mu_);
  77. SymbolizedStack *res = SymbolizedStack::New(addr);
  78. auto *mod = FindModuleForAddress(addr);
  79. if (!mod)
  80. return res;
  81. // Always fill data about module name and offset.
  82. res->info.FillModuleInfo(*mod);
  83. for (auto &tool : tools_) {
  84. SymbolizerScope sym_scope(this);
  85. if (tool.SymbolizePC(addr, res)) {
  86. return res;
  87. }
  88. }
  89. return res;
  90. }
  91. bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
  92. Lock l(&mu_);
  93. const char *module_name = nullptr;
  94. uptr module_offset;
  95. ModuleArch arch;
  96. if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
  97. &arch))
  98. return false;
  99. info->Clear();
  100. info->module = internal_strdup(module_name);
  101. info->module_offset = module_offset;
  102. info->module_arch = arch;
  103. for (auto &tool : tools_) {
  104. SymbolizerScope sym_scope(this);
  105. if (tool.SymbolizeData(addr, info)) {
  106. return true;
  107. }
  108. }
  109. return false;
  110. }
  111. bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
  112. Lock l(&mu_);
  113. const char *module_name = nullptr;
  114. if (!FindModuleNameAndOffsetForAddress(
  115. addr, &module_name, &info->module_offset, &info->module_arch))
  116. return false;
  117. info->module = internal_strdup(module_name);
  118. for (auto &tool : tools_) {
  119. SymbolizerScope sym_scope(this);
  120. if (tool.SymbolizeFrame(addr, info)) {
  121. return true;
  122. }
  123. }
  124. return false;
  125. }
  126. bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
  127. uptr *module_address) {
  128. Lock l(&mu_);
  129. const char *internal_module_name = nullptr;
  130. ModuleArch arch;
  131. if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
  132. module_address, &arch))
  133. return false;
  134. if (module_name)
  135. *module_name = module_names_.GetOwnedCopy(internal_module_name);
  136. return true;
  137. }
  138. void Symbolizer::Flush() {
  139. Lock l(&mu_);
  140. for (auto &tool : tools_) {
  141. SymbolizerScope sym_scope(this);
  142. tool.Flush();
  143. }
  144. }
  145. const char *Symbolizer::Demangle(const char *name) {
  146. CHECK(name);
  147. Lock l(&mu_);
  148. for (auto &tool : tools_) {
  149. SymbolizerScope sym_scope(this);
  150. if (const char *demangled = tool.Demangle(name))
  151. return demangled;
  152. }
  153. if (const char *demangled = PlatformDemangle(name))
  154. return demangled;
  155. return name;
  156. }
  157. bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
  158. const char **module_name,
  159. uptr *module_offset,
  160. ModuleArch *module_arch) {
  161. const LoadedModule *module = FindModuleForAddress(address);
  162. if (!module)
  163. return false;
  164. *module_name = module->full_name();
  165. *module_offset = address - module->base_address();
  166. *module_arch = module->arch();
  167. return true;
  168. }
  169. void Symbolizer::RefreshModules() {
  170. modules_.init();
  171. fallback_modules_.fallbackInit();
  172. RAW_CHECK(modules_.size() > 0);
  173. modules_fresh_ = true;
  174. }
  175. const ListOfModules &Symbolizer::GetRefreshedListOfModules() {
  176. if (!modules_fresh_)
  177. RefreshModules();
  178. return modules_;
  179. }
  180. static const LoadedModule *SearchForModule(const ListOfModules &modules,
  181. uptr address) {
  182. for (uptr i = 0; i < modules.size(); i++) {
  183. if (modules[i].containsAddress(address)) {
  184. return &modules[i];
  185. }
  186. }
  187. return nullptr;
  188. }
  189. const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
  190. bool modules_were_reloaded = false;
  191. if (!modules_fresh_) {
  192. RefreshModules();
  193. modules_were_reloaded = true;
  194. }
  195. const LoadedModule *module = SearchForModule(modules_, address);
  196. if (module) return module;
  197. // dlopen/dlclose interceptors invalidate the module list, but when
  198. // interception is disabled, we need to retry if the lookup fails in
  199. // case the module list changed.
  200. #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
  201. if (!modules_were_reloaded) {
  202. RefreshModules();
  203. module = SearchForModule(modules_, address);
  204. if (module) return module;
  205. }
  206. #endif
  207. if (fallback_modules_.size()) {
  208. module = SearchForModule(fallback_modules_, address);
  209. }
  210. return module;
  211. }
  212. // For now we assume the following protocol:
  213. // For each request of the form
  214. // <module_name> <module_offset>
  215. // passed to STDIN, external symbolizer prints to STDOUT response:
  216. // <function_name>
  217. // <file_name>:<line_number>:<column_number>
  218. // <function_name>
  219. // <file_name>:<line_number>:<column_number>
  220. // ...
  221. // <empty line>
  222. class LLVMSymbolizerProcess final : public SymbolizerProcess {
  223. public:
  224. explicit LLVMSymbolizerProcess(const char *path)
  225. : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
  226. private:
  227. bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
  228. // Empty line marks the end of llvm-symbolizer output.
  229. return length >= 2 && buffer[length - 1] == '\n' &&
  230. buffer[length - 2] == '\n';
  231. }
  232. // When adding a new architecture, don't forget to also update
  233. // script/asan_symbolize.py and sanitizer_common.h.
  234. void GetArgV(const char *path_to_binary,
  235. const char *(&argv)[kArgVMax]) const override {
  236. #if defined(__x86_64h__)
  237. const char* const kSymbolizerArch = "--default-arch=x86_64h";
  238. #elif defined(__x86_64__)
  239. const char* const kSymbolizerArch = "--default-arch=x86_64";
  240. #elif defined(__i386__)
  241. const char* const kSymbolizerArch = "--default-arch=i386";
  242. #elif SANITIZER_LOONGARCH64
  243. const char *const kSymbolizerArch = "--default-arch=loongarch64";
  244. #elif SANITIZER_RISCV64
  245. const char *const kSymbolizerArch = "--default-arch=riscv64";
  246. #elif defined(__aarch64__)
  247. const char* const kSymbolizerArch = "--default-arch=arm64";
  248. #elif defined(__arm__)
  249. const char* const kSymbolizerArch = "--default-arch=arm";
  250. #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  251. const char* const kSymbolizerArch = "--default-arch=powerpc64";
  252. #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  253. const char* const kSymbolizerArch = "--default-arch=powerpc64le";
  254. #elif defined(__s390x__)
  255. const char* const kSymbolizerArch = "--default-arch=s390x";
  256. #elif defined(__s390__)
  257. const char* const kSymbolizerArch = "--default-arch=s390";
  258. #else
  259. const char* const kSymbolizerArch = "--default-arch=unknown";
  260. #endif
  261. const char *const demangle_flag =
  262. common_flags()->demangle ? "--demangle" : "--no-demangle";
  263. const char *const inline_flag =
  264. common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
  265. int i = 0;
  266. argv[i++] = path_to_binary;
  267. argv[i++] = demangle_flag;
  268. argv[i++] = inline_flag;
  269. argv[i++] = kSymbolizerArch;
  270. argv[i++] = nullptr;
  271. CHECK_LE(i, kArgVMax);
  272. }
  273. };
  274. LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
  275. : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
  276. // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
  277. // Windows, so extract tokens from the right hand side first. The column info is
  278. // also optional.
  279. static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
  280. char *file_line_info = nullptr;
  281. str = ExtractToken(str, "\n", &file_line_info);
  282. CHECK(file_line_info);
  283. if (uptr size = internal_strlen(file_line_info)) {
  284. char *back = file_line_info + size - 1;
  285. for (int i = 0; i < 2; ++i) {
  286. while (back > file_line_info && IsDigit(*back)) --back;
  287. if (*back != ':' || !IsDigit(back[1])) break;
  288. info->column = info->line;
  289. info->line = internal_atoll(back + 1);
  290. // Truncate the string at the colon to keep only filename.
  291. *back = '\0';
  292. --back;
  293. }
  294. ExtractToken(file_line_info, "", &info->file);
  295. }
  296. InternalFree(file_line_info);
  297. return str;
  298. }
  299. // Parses one or more two-line strings in the following format:
  300. // <function_name>
  301. // <file_name>:<line_number>[:<column_number>]
  302. // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
  303. // them use the same output format.
  304. void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
  305. bool top_frame = true;
  306. SymbolizedStack *last = res;
  307. while (true) {
  308. char *function_name = nullptr;
  309. str = ExtractToken(str, "\n", &function_name);
  310. CHECK(function_name);
  311. if (function_name[0] == '\0') {
  312. // There are no more frames.
  313. InternalFree(function_name);
  314. break;
  315. }
  316. SymbolizedStack *cur;
  317. if (top_frame) {
  318. cur = res;
  319. top_frame = false;
  320. } else {
  321. cur = SymbolizedStack::New(res->info.address);
  322. cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
  323. res->info.module_arch);
  324. last->next = cur;
  325. last = cur;
  326. }
  327. AddressInfo *info = &cur->info;
  328. info->function = function_name;
  329. str = ParseFileLineInfo(info, str);
  330. // Functions and filenames can be "??", in which case we write 0
  331. // to address info to mark that names are unknown.
  332. if (0 == internal_strcmp(info->function, "??")) {
  333. InternalFree(info->function);
  334. info->function = 0;
  335. }
  336. if (info->file && 0 == internal_strcmp(info->file, "??")) {
  337. InternalFree(info->file);
  338. info->file = 0;
  339. }
  340. }
  341. }
  342. // Parses a two- or three-line string in the following format:
  343. // <symbol_name>
  344. // <start_address> <size>
  345. // <filename>:<column>
  346. // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
  347. // for symbolizing the third line in D123538, but we support the older two-line
  348. // information as well.
  349. void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
  350. str = ExtractToken(str, "\n", &info->name);
  351. str = ExtractUptr(str, " ", &info->start);
  352. str = ExtractUptr(str, "\n", &info->size);
  353. // Note: If the third line isn't present, these calls will set info.{file,
  354. // line} to empty strings.
  355. str = ExtractToken(str, ":", &info->file);
  356. str = ExtractUptr(str, "\n", &info->line);
  357. }
  358. void ParseSymbolizeFrameOutput(const char *str,
  359. InternalMmapVector<LocalInfo> *locals) {
  360. if (internal_strncmp(str, "??", 2) == 0)
  361. return;
  362. while (*str) {
  363. LocalInfo local;
  364. str = ExtractToken(str, "\n", &local.function_name);
  365. str = ExtractToken(str, "\n", &local.name);
  366. AddressInfo addr;
  367. str = ParseFileLineInfo(&addr, str);
  368. local.decl_file = addr.file;
  369. local.decl_line = addr.line;
  370. local.has_frame_offset = internal_strncmp(str, "??", 2) != 0;
  371. str = ExtractSptr(str, " ", &local.frame_offset);
  372. local.has_size = internal_strncmp(str, "??", 2) != 0;
  373. str = ExtractUptr(str, " ", &local.size);
  374. local.has_tag_offset = internal_strncmp(str, "??", 2) != 0;
  375. str = ExtractUptr(str, "\n", &local.tag_offset);
  376. locals->push_back(local);
  377. }
  378. }
  379. bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
  380. AddressInfo *info = &stack->info;
  381. const char *buf = FormatAndSendCommand(
  382. "CODE", info->module, info->module_offset, info->module_arch);
  383. if (!buf)
  384. return false;
  385. ParseSymbolizePCOutput(buf, stack);
  386. return true;
  387. }
  388. bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
  389. const char *buf = FormatAndSendCommand(
  390. "DATA", info->module, info->module_offset, info->module_arch);
  391. if (!buf)
  392. return false;
  393. ParseSymbolizeDataOutput(buf, info);
  394. info->start += (addr - info->module_offset); // Add the base address.
  395. return true;
  396. }
  397. bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
  398. const char *buf = FormatAndSendCommand(
  399. "FRAME", info->module, info->module_offset, info->module_arch);
  400. if (!buf)
  401. return false;
  402. ParseSymbolizeFrameOutput(buf, &info->locals);
  403. return true;
  404. }
  405. const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
  406. const char *module_name,
  407. uptr module_offset,
  408. ModuleArch arch) {
  409. CHECK(module_name);
  410. int size_needed = 0;
  411. if (arch == kModuleArchUnknown)
  412. size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n",
  413. command_prefix, module_name, module_offset);
  414. else
  415. size_needed = internal_snprintf(buffer_, kBufferSize,
  416. "%s \"%s:%s\" 0x%zx\n", command_prefix,
  417. module_name, ModuleArchToString(arch),
  418. module_offset);
  419. if (size_needed >= static_cast<int>(kBufferSize)) {
  420. Report("WARNING: Command buffer too small");
  421. return nullptr;
  422. }
  423. return symbolizer_process_->SendCommand(buffer_);
  424. }
  425. SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
  426. : path_(path),
  427. input_fd_(kInvalidFd),
  428. output_fd_(kInvalidFd),
  429. times_restarted_(0),
  430. failed_to_start_(false),
  431. reported_invalid_path_(false),
  432. use_posix_spawn_(use_posix_spawn) {
  433. CHECK(path_);
  434. CHECK_NE(path_[0], '\0');
  435. }
  436. static bool IsSameModule(const char* path) {
  437. if (const char* ProcessName = GetProcessName()) {
  438. if (const char* SymbolizerName = StripModuleName(path)) {
  439. return !internal_strcmp(ProcessName, SymbolizerName);
  440. }
  441. }
  442. return false;
  443. }
  444. const char *SymbolizerProcess::SendCommand(const char *command) {
  445. if (failed_to_start_)
  446. return nullptr;
  447. if (IsSameModule(path_)) {
  448. Report("WARNING: Symbolizer was blocked from starting itself!\n");
  449. failed_to_start_ = true;
  450. return nullptr;
  451. }
  452. for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
  453. // Start or restart symbolizer if we failed to send command to it.
  454. if (const char *res = SendCommandImpl(command))
  455. return res;
  456. Restart();
  457. }
  458. if (!failed_to_start_) {
  459. Report("WARNING: Failed to use and restart external symbolizer!\n");
  460. failed_to_start_ = true;
  461. }
  462. return nullptr;
  463. }
  464. const char *SymbolizerProcess::SendCommandImpl(const char *command) {
  465. if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
  466. return nullptr;
  467. if (!WriteToSymbolizer(command, internal_strlen(command)))
  468. return nullptr;
  469. if (!ReadFromSymbolizer())
  470. return nullptr;
  471. return buffer_.data();
  472. }
  473. bool SymbolizerProcess::Restart() {
  474. if (input_fd_ != kInvalidFd)
  475. CloseFile(input_fd_);
  476. if (output_fd_ != kInvalidFd)
  477. CloseFile(output_fd_);
  478. return StartSymbolizerSubprocess();
  479. }
  480. bool SymbolizerProcess::ReadFromSymbolizer() {
  481. buffer_.clear();
  482. constexpr uptr max_length = 1024;
  483. bool ret = true;
  484. do {
  485. uptr just_read = 0;
  486. uptr size_before = buffer_.size();
  487. buffer_.resize(size_before + max_length);
  488. buffer_.resize(buffer_.capacity());
  489. bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
  490. buffer_.size() - size_before, &just_read);
  491. if (!ret)
  492. just_read = 0;
  493. buffer_.resize(size_before + just_read);
  494. // We can't read 0 bytes, as we don't expect external symbolizer to close
  495. // its stdout.
  496. if (just_read == 0) {
  497. Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
  498. ret = false;
  499. break;
  500. }
  501. } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
  502. buffer_.push_back('\0');
  503. return ret;
  504. }
  505. bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
  506. if (length == 0)
  507. return true;
  508. uptr write_len = 0;
  509. bool success = WriteToFile(output_fd_, buffer, length, &write_len);
  510. if (!success || write_len != length) {
  511. Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
  512. return false;
  513. }
  514. return true;
  515. }
  516. #endif // !SANITIZER_SYMBOLIZER_MARKUP
  517. } // namespace __sanitizer