sanitizer_symbolizer_libcdep.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is shared between AddressSanitizer and ThreadSanitizer
  10. // run-time libraries.
  11. //===----------------------------------------------------------------------===//
  12. #include "sanitizer_allocator_internal.h"
  13. #include "sanitizer_internal_defs.h"
  14. #include "sanitizer_platform.h"
  15. #include "sanitizer_symbolizer_internal.h"
  16. namespace __sanitizer {
  17. Symbolizer *Symbolizer::GetOrInit() {
  18. SpinMutexLock l(&init_mu_);
  19. if (symbolizer_)
  20. return symbolizer_;
  21. symbolizer_ = PlatformInit();
  22. CHECK(symbolizer_);
  23. return symbolizer_;
  24. }
  25. // See sanitizer_symbolizer_markup.cpp.
  26. #if !SANITIZER_SYMBOLIZER_MARKUP
  27. const char *ExtractToken(const char *str, const char *delims, char **result) {
  28. uptr prefix_len = internal_strcspn(str, delims);
  29. *result = (char*)InternalAlloc(prefix_len + 1);
  30. internal_memcpy(*result, str, prefix_len);
  31. (*result)[prefix_len] = '\0';
  32. const char *prefix_end = str + prefix_len;
  33. if (*prefix_end != '\0') prefix_end++;
  34. return prefix_end;
  35. }
  36. const char *ExtractInt(const char *str, const char *delims, int *result) {
  37. char *buff = nullptr;
  38. const char *ret = ExtractToken(str, delims, &buff);
  39. if (buff) {
  40. *result = (int)internal_atoll(buff);
  41. }
  42. InternalFree(buff);
  43. return ret;
  44. }
  45. const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
  46. char *buff = nullptr;
  47. const char *ret = ExtractToken(str, delims, &buff);
  48. if (buff) {
  49. *result = (uptr)internal_atoll(buff);
  50. }
  51. InternalFree(buff);
  52. return ret;
  53. }
  54. const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
  55. char *buff = nullptr;
  56. const char *ret = ExtractToken(str, delims, &buff);
  57. if (buff) {
  58. *result = (sptr)internal_atoll(buff);
  59. }
  60. InternalFree(buff);
  61. return ret;
  62. }
  63. const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
  64. char **result) {
  65. const char *found_delimiter = internal_strstr(str, delimiter);
  66. uptr prefix_len =
  67. found_delimiter ? found_delimiter - str : internal_strlen(str);
  68. *result = (char *)InternalAlloc(prefix_len + 1);
  69. internal_memcpy(*result, str, prefix_len);
  70. (*result)[prefix_len] = '\0';
  71. const char *prefix_end = str + prefix_len;
  72. if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
  73. return prefix_end;
  74. }
  75. SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
  76. Lock l(&mu_);
  77. SymbolizedStack *res = SymbolizedStack::New(addr);
  78. auto *mod = FindModuleForAddress(addr);
  79. if (!mod)
  80. return res;
  81. // Always fill data about module name and offset.
  82. res->info.FillModuleInfo(*mod);
  83. for (auto &tool : tools_) {
  84. SymbolizerScope sym_scope(this);
  85. if (tool.SymbolizePC(addr, res)) {
  86. return res;
  87. }
  88. }
  89. return res;
  90. }
  91. bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
  92. Lock l(&mu_);
  93. const char *module_name = nullptr;
  94. uptr module_offset;
  95. ModuleArch arch;
  96. if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
  97. &arch))
  98. return false;
  99. info->Clear();
  100. info->module = internal_strdup(module_name);
  101. info->module_offset = module_offset;
  102. info->module_arch = arch;
  103. for (auto &tool : tools_) {
  104. SymbolizerScope sym_scope(this);
  105. if (tool.SymbolizeData(addr, info)) {
  106. return true;
  107. }
  108. }
  109. return true;
  110. }
  111. bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
  112. Lock l(&mu_);
  113. const char *module_name = nullptr;
  114. if (!FindModuleNameAndOffsetForAddress(
  115. addr, &module_name, &info->module_offset, &info->module_arch))
  116. return false;
  117. info->module = internal_strdup(module_name);
  118. for (auto &tool : tools_) {
  119. SymbolizerScope sym_scope(this);
  120. if (tool.SymbolizeFrame(addr, info)) {
  121. return true;
  122. }
  123. }
  124. return true;
  125. }
  126. bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
  127. uptr *module_address) {
  128. Lock l(&mu_);
  129. const char *internal_module_name = nullptr;
  130. ModuleArch arch;
  131. if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
  132. module_address, &arch))
  133. return false;
  134. if (module_name)
  135. *module_name = module_names_.GetOwnedCopy(internal_module_name);
  136. return true;
  137. }
  138. void Symbolizer::Flush() {
  139. Lock l(&mu_);
  140. for (auto &tool : tools_) {
  141. SymbolizerScope sym_scope(this);
  142. tool.Flush();
  143. }
  144. }
  145. const char *Symbolizer::Demangle(const char *name) {
  146. Lock l(&mu_);
  147. for (auto &tool : tools_) {
  148. SymbolizerScope sym_scope(this);
  149. if (const char *demangled = tool.Demangle(name))
  150. return demangled;
  151. }
  152. return PlatformDemangle(name);
  153. }
  154. bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
  155. const char **module_name,
  156. uptr *module_offset,
  157. ModuleArch *module_arch) {
  158. const LoadedModule *module = FindModuleForAddress(address);
  159. if (!module)
  160. return false;
  161. *module_name = module->full_name();
  162. *module_offset = address - module->base_address();
  163. *module_arch = module->arch();
  164. return true;
  165. }
  166. void Symbolizer::RefreshModules() {
  167. modules_.init();
  168. fallback_modules_.fallbackInit();
  169. RAW_CHECK(modules_.size() > 0);
  170. modules_fresh_ = true;
  171. }
  172. static const LoadedModule *SearchForModule(const ListOfModules &modules,
  173. uptr address) {
  174. for (uptr i = 0; i < modules.size(); i++) {
  175. if (modules[i].containsAddress(address)) {
  176. return &modules[i];
  177. }
  178. }
  179. return nullptr;
  180. }
  181. const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
  182. bool modules_were_reloaded = false;
  183. if (!modules_fresh_) {
  184. RefreshModules();
  185. modules_were_reloaded = true;
  186. }
  187. const LoadedModule *module = SearchForModule(modules_, address);
  188. if (module) return module;
  189. // dlopen/dlclose interceptors invalidate the module list, but when
  190. // interception is disabled, we need to retry if the lookup fails in
  191. // case the module list changed.
  192. #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
  193. if (!modules_were_reloaded) {
  194. RefreshModules();
  195. module = SearchForModule(modules_, address);
  196. if (module) return module;
  197. }
  198. #endif
  199. if (fallback_modules_.size()) {
  200. module = SearchForModule(fallback_modules_, address);
  201. }
  202. return module;
  203. }
  204. // For now we assume the following protocol:
  205. // For each request of the form
  206. // <module_name> <module_offset>
  207. // passed to STDIN, external symbolizer prints to STDOUT response:
  208. // <function_name>
  209. // <file_name>:<line_number>:<column_number>
  210. // <function_name>
  211. // <file_name>:<line_number>:<column_number>
  212. // ...
  213. // <empty line>
  214. class LLVMSymbolizerProcess final : public SymbolizerProcess {
  215. public:
  216. explicit LLVMSymbolizerProcess(const char *path)
  217. : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
  218. private:
  219. bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
  220. // Empty line marks the end of llvm-symbolizer output.
  221. return length >= 2 && buffer[length - 1] == '\n' &&
  222. buffer[length - 2] == '\n';
  223. }
  224. // When adding a new architecture, don't forget to also update
  225. // script/asan_symbolize.py and sanitizer_common.h.
  226. void GetArgV(const char *path_to_binary,
  227. const char *(&argv)[kArgVMax]) const override {
  228. #if defined(__x86_64h__)
  229. const char* const kSymbolizerArch = "--default-arch=x86_64h";
  230. #elif defined(__x86_64__)
  231. const char* const kSymbolizerArch = "--default-arch=x86_64";
  232. #elif defined(__i386__)
  233. const char* const kSymbolizerArch = "--default-arch=i386";
  234. #elif SANITIZER_LOONGARCH64
  235. const char *const kSymbolizerArch = "--default-arch=loongarch64";
  236. #elif SANITIZER_RISCV64
  237. const char *const kSymbolizerArch = "--default-arch=riscv64";
  238. #elif defined(__aarch64__)
  239. const char* const kSymbolizerArch = "--default-arch=arm64";
  240. #elif defined(__arm__)
  241. const char* const kSymbolizerArch = "--default-arch=arm";
  242. #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  243. const char* const kSymbolizerArch = "--default-arch=powerpc64";
  244. #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  245. const char* const kSymbolizerArch = "--default-arch=powerpc64le";
  246. #elif defined(__s390x__)
  247. const char* const kSymbolizerArch = "--default-arch=s390x";
  248. #elif defined(__s390__)
  249. const char* const kSymbolizerArch = "--default-arch=s390";
  250. #else
  251. const char* const kSymbolizerArch = "--default-arch=unknown";
  252. #endif
  253. const char *const demangle_flag =
  254. common_flags()->demangle ? "--demangle" : "--no-demangle";
  255. const char *const inline_flag =
  256. common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
  257. int i = 0;
  258. argv[i++] = path_to_binary;
  259. argv[i++] = demangle_flag;
  260. argv[i++] = inline_flag;
  261. argv[i++] = kSymbolizerArch;
  262. argv[i++] = nullptr;
  263. CHECK_LE(i, kArgVMax);
  264. }
  265. };
  266. LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
  267. : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
  268. // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
  269. // Windows, so extract tokens from the right hand side first. The column info is
  270. // also optional.
  271. static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
  272. char *file_line_info = nullptr;
  273. str = ExtractToken(str, "\n", &file_line_info);
  274. CHECK(file_line_info);
  275. if (uptr size = internal_strlen(file_line_info)) {
  276. char *back = file_line_info + size - 1;
  277. for (int i = 0; i < 2; ++i) {
  278. while (back > file_line_info && IsDigit(*back)) --back;
  279. if (*back != ':' || !IsDigit(back[1])) break;
  280. info->column = info->line;
  281. info->line = internal_atoll(back + 1);
  282. // Truncate the string at the colon to keep only filename.
  283. *back = '\0';
  284. --back;
  285. }
  286. ExtractToken(file_line_info, "", &info->file);
  287. }
  288. InternalFree(file_line_info);
  289. return str;
  290. }
  291. // Parses one or more two-line strings in the following format:
  292. // <function_name>
  293. // <file_name>:<line_number>[:<column_number>]
  294. // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
  295. // them use the same output format.
  296. void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
  297. bool top_frame = true;
  298. SymbolizedStack *last = res;
  299. while (true) {
  300. char *function_name = nullptr;
  301. str = ExtractToken(str, "\n", &function_name);
  302. CHECK(function_name);
  303. if (function_name[0] == '\0') {
  304. // There are no more frames.
  305. InternalFree(function_name);
  306. break;
  307. }
  308. SymbolizedStack *cur;
  309. if (top_frame) {
  310. cur = res;
  311. top_frame = false;
  312. } else {
  313. cur = SymbolizedStack::New(res->info.address);
  314. cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
  315. res->info.module_arch);
  316. last->next = cur;
  317. last = cur;
  318. }
  319. AddressInfo *info = &cur->info;
  320. info->function = function_name;
  321. str = ParseFileLineInfo(info, str);
  322. // Functions and filenames can be "??", in which case we write 0
  323. // to address info to mark that names are unknown.
  324. if (0 == internal_strcmp(info->function, "??")) {
  325. InternalFree(info->function);
  326. info->function = 0;
  327. }
  328. if (info->file && 0 == internal_strcmp(info->file, "??")) {
  329. InternalFree(info->file);
  330. info->file = 0;
  331. }
  332. }
  333. }
  334. // Parses a two- or three-line string in the following format:
  335. // <symbol_name>
  336. // <start_address> <size>
  337. // <filename>:<column>
  338. // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
  339. // for symbolizing the third line in D123538, but we support the older two-line
  340. // information as well.
  341. void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
  342. str = ExtractToken(str, "\n", &info->name);
  343. str = ExtractUptr(str, " ", &info->start);
  344. str = ExtractUptr(str, "\n", &info->size);
  345. // Note: If the third line isn't present, these calls will set info.{file,
  346. // line} to empty strings.
  347. str = ExtractToken(str, ":", &info->file);
  348. str = ExtractUptr(str, "\n", &info->line);
  349. }
  350. static void ParseSymbolizeFrameOutput(const char *str,
  351. InternalMmapVector<LocalInfo> *locals) {
  352. if (internal_strncmp(str, "??", 2) == 0)
  353. return;
  354. while (*str) {
  355. LocalInfo local;
  356. str = ExtractToken(str, "\n", &local.function_name);
  357. str = ExtractToken(str, "\n", &local.name);
  358. AddressInfo addr;
  359. str = ParseFileLineInfo(&addr, str);
  360. local.decl_file = addr.file;
  361. local.decl_line = addr.line;
  362. local.has_frame_offset = internal_strncmp(str, "??", 2) != 0;
  363. str = ExtractSptr(str, " ", &local.frame_offset);
  364. local.has_size = internal_strncmp(str, "??", 2) != 0;
  365. str = ExtractUptr(str, " ", &local.size);
  366. local.has_tag_offset = internal_strncmp(str, "??", 2) != 0;
  367. str = ExtractUptr(str, "\n", &local.tag_offset);
  368. locals->push_back(local);
  369. }
  370. }
  371. bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
  372. AddressInfo *info = &stack->info;
  373. const char *buf = FormatAndSendCommand(
  374. "CODE", info->module, info->module_offset, info->module_arch);
  375. if (!buf)
  376. return false;
  377. ParseSymbolizePCOutput(buf, stack);
  378. return true;
  379. }
  380. bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
  381. const char *buf = FormatAndSendCommand(
  382. "DATA", info->module, info->module_offset, info->module_arch);
  383. if (!buf)
  384. return false;
  385. ParseSymbolizeDataOutput(buf, info);
  386. info->start += (addr - info->module_offset); // Add the base address.
  387. return true;
  388. }
  389. bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
  390. const char *buf = FormatAndSendCommand(
  391. "FRAME", info->module, info->module_offset, info->module_arch);
  392. if (!buf)
  393. return false;
  394. ParseSymbolizeFrameOutput(buf, &info->locals);
  395. return true;
  396. }
  397. const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
  398. const char *module_name,
  399. uptr module_offset,
  400. ModuleArch arch) {
  401. CHECK(module_name);
  402. int size_needed = 0;
  403. if (arch == kModuleArchUnknown)
  404. size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n",
  405. command_prefix, module_name, module_offset);
  406. else
  407. size_needed = internal_snprintf(buffer_, kBufferSize,
  408. "%s \"%s:%s\" 0x%zx\n", command_prefix,
  409. module_name, ModuleArchToString(arch),
  410. module_offset);
  411. if (size_needed >= static_cast<int>(kBufferSize)) {
  412. Report("WARNING: Command buffer too small");
  413. return nullptr;
  414. }
  415. return symbolizer_process_->SendCommand(buffer_);
  416. }
  417. SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
  418. : path_(path),
  419. input_fd_(kInvalidFd),
  420. output_fd_(kInvalidFd),
  421. times_restarted_(0),
  422. failed_to_start_(false),
  423. reported_invalid_path_(false),
  424. use_posix_spawn_(use_posix_spawn) {
  425. CHECK(path_);
  426. CHECK_NE(path_[0], '\0');
  427. }
  428. static bool IsSameModule(const char* path) {
  429. if (const char* ProcessName = GetProcessName()) {
  430. if (const char* SymbolizerName = StripModuleName(path)) {
  431. return !internal_strcmp(ProcessName, SymbolizerName);
  432. }
  433. }
  434. return false;
  435. }
  436. const char *SymbolizerProcess::SendCommand(const char *command) {
  437. if (failed_to_start_)
  438. return nullptr;
  439. if (IsSameModule(path_)) {
  440. Report("WARNING: Symbolizer was blocked from starting itself!\n");
  441. failed_to_start_ = true;
  442. return nullptr;
  443. }
  444. for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
  445. // Start or restart symbolizer if we failed to send command to it.
  446. if (const char *res = SendCommandImpl(command))
  447. return res;
  448. Restart();
  449. }
  450. if (!failed_to_start_) {
  451. Report("WARNING: Failed to use and restart external symbolizer!\n");
  452. failed_to_start_ = true;
  453. }
  454. return nullptr;
  455. }
  456. const char *SymbolizerProcess::SendCommandImpl(const char *command) {
  457. if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
  458. return nullptr;
  459. if (!WriteToSymbolizer(command, internal_strlen(command)))
  460. return nullptr;
  461. if (!ReadFromSymbolizer())
  462. return nullptr;
  463. return buffer_.data();
  464. }
  465. bool SymbolizerProcess::Restart() {
  466. if (input_fd_ != kInvalidFd)
  467. CloseFile(input_fd_);
  468. if (output_fd_ != kInvalidFd)
  469. CloseFile(output_fd_);
  470. return StartSymbolizerSubprocess();
  471. }
  472. bool SymbolizerProcess::ReadFromSymbolizer() {
  473. buffer_.clear();
  474. constexpr uptr max_length = 1024;
  475. bool ret = true;
  476. do {
  477. uptr just_read = 0;
  478. uptr size_before = buffer_.size();
  479. buffer_.resize(size_before + max_length);
  480. buffer_.resize(buffer_.capacity());
  481. bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
  482. buffer_.size() - size_before, &just_read);
  483. if (!ret)
  484. just_read = 0;
  485. buffer_.resize(size_before + just_read);
  486. // We can't read 0 bytes, as we don't expect external symbolizer to close
  487. // its stdout.
  488. if (just_read == 0) {
  489. Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
  490. ret = false;
  491. break;
  492. }
  493. } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
  494. buffer_.push_back('\0');
  495. return ret;
  496. }
  497. bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
  498. if (length == 0)
  499. return true;
  500. uptr write_len = 0;
  501. bool success = WriteToFile(output_fd_, buffer, length, &write_len);
  502. if (!success || write_len != length) {
  503. Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
  504. return false;
  505. }
  506. return true;
  507. }
  508. #endif // !SANITIZER_SYMBOLIZER_MARKUP
  509. } // namespace __sanitizer