__init__.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. # coding: utf-8
  2. import os
  3. import re
  4. import glob
  5. import socket
  6. import logging
  7. import platform
  8. import subprocess
  9. import six
  10. from library.python.reservoir_sampling import reservoir_sampling
  11. logger = logging.getLogger(__name__)
  12. def _read_file(filename):
  13. with open(filename) as afile:
  14. return afile.read().strip("\n")
  15. def recover_core_dump_file(binary_path, cwd, pid, core_pattern=None):
  16. class CoreFilePattern(object):
  17. def __init__(self, path, mask):
  18. self.path = path
  19. self.mask = mask
  20. cwd = cwd or os.getcwd()
  21. system = platform.system().lower()
  22. if system.startswith("linux"):
  23. import stat
  24. import resource
  25. logger.debug("hostname = '%s'", socket.gethostname())
  26. logger.debug("rlimit_core = '%s'", str(resource.getrlimit(resource.RLIMIT_CORE)))
  27. if core_pattern is None:
  28. core_pattern = _read_file("/proc/sys/kernel/core_pattern")
  29. logger.debug("core_pattern = '%s'", core_pattern)
  30. if core_pattern.startswith("/"):
  31. default_pattern = CoreFilePattern(os.path.dirname(core_pattern), '*')
  32. else:
  33. default_pattern = CoreFilePattern(cwd, '*')
  34. def resolve_core_mask(core_mask):
  35. def resolve(text):
  36. if text == "%p":
  37. return str(pid)
  38. elif text == "%e":
  39. # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/include/linux/sched.h#L847
  40. # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/fs/coredump.c#L278
  41. return os.path.basename(binary_path)[:15]
  42. elif text == "%E":
  43. return binary_path.replace("/", "!")
  44. elif text == "%%":
  45. return "%"
  46. elif text.startswith("%"):
  47. return "*"
  48. return text
  49. parts = filter(None, re.split(r"(%.)", core_mask))
  50. return "".join([resolve(p) for p in parts])
  51. # don't interpret a program for piping core dumps as a pattern
  52. if core_pattern and not core_pattern.startswith("|"):
  53. default_pattern.mask = os.path.basename(core_pattern)
  54. else:
  55. core_uses_pid = int(_read_file("/proc/sys/kernel/core_uses_pid"))
  56. logger.debug("core_uses_pid = '%d'", core_uses_pid)
  57. if core_uses_pid == 0:
  58. default_pattern.mask = "core"
  59. else:
  60. default_pattern.mask = "core.%p"
  61. # widely distributed core dump dir and mask (see DEVTOOLS-4408)
  62. yandex_pattern = CoreFilePattern('/coredumps', '%e.%p.%s')
  63. yandex_market_pattern = CoreFilePattern('/var/tmp/cores', 'core.%..%e.%s.%p.*')
  64. for pattern in [default_pattern, yandex_pattern, yandex_market_pattern]:
  65. pattern.mask = resolve_core_mask(pattern.mask)
  66. if not os.path.exists(pattern.path):
  67. logger.warning("Core dump dir doesn't exist: %s", pattern.path)
  68. continue
  69. logger.debug(
  70. "Core dump dir (%s) permission mask: %s (expected: %s (%s-dir, %s-sticky bit))",
  71. pattern.path,
  72. oct(os.stat(pattern.path)[stat.ST_MODE]),
  73. oct(stat.S_IFDIR | stat.S_ISVTX | 0o777),
  74. oct(stat.S_IFDIR),
  75. oct(stat.S_ISVTX),
  76. )
  77. logger.debug("Search for core dump files match pattern '%s' in '%s'", pattern.mask, pattern.path)
  78. cores = glob.glob(os.path.join(pattern.path, pattern.mask))
  79. files = os.listdir(pattern.path)
  80. logger.debug(
  81. "Matched core dump files (%d/%d): [%s] (mismatched samples: %s)",
  82. len(cores),
  83. len(files),
  84. ", ".join(cores),
  85. ", ".join(reservoir_sampling(files, 5)),
  86. )
  87. if len(cores) == 1:
  88. return cores[0]
  89. elif len(cores) > 1:
  90. stat = [(filename, os.stat(filename).st_mtime) for filename in cores]
  91. entry = sorted(stat, key=lambda x: x[1])[-1]
  92. logger.debug("Latest core dump file: '%s' with %d mtime", entry[0], entry[1])
  93. return entry[0]
  94. else:
  95. logger.debug("Core dump file recovering is not supported on '%s'", system)
  96. return None
  97. def get_gdb_full_backtrace(binary, core, gdb_path):
  98. cmd = [
  99. gdb_path, binary, core,
  100. "--eval-command", "set print thread-events off",
  101. "--eval-command", "thread apply all backtrace full",
  102. "--batch",
  103. "--quiet",
  104. ]
  105. proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  106. output, stderr = proc.communicate()
  107. output = six.ensure_str(output)
  108. if stderr:
  109. output += "\nstderr >>\n" + six.ensure_str(stderr)
  110. return output
  111. def get_problem_stack(backtrace):
  112. stack = []
  113. found_thread1 = False
  114. regex = re.compile(r'[Tt]hread (\d+)')
  115. for line in backtrace.split("\n"):
  116. match = regex.search(line)
  117. if match:
  118. if found_thread1:
  119. break
  120. if int(match.group(1)) == 1:
  121. found_thread1 = True
  122. if found_thread1:
  123. stack.append(line)
  124. if not stack:
  125. return backtrace
  126. return "\n".join(stack)
  127. # XXX
  128. def colorize_backtrace(text):
  129. filters = [
  130. # Function names and the class they belong to
  131. (re.compile(r"^(#[0-9]+ .*?)([a-zA-Z0-9_:\.@]+)(\s?\()", flags=re.MULTILINE), r"\1[[c:cyan]]\2[[rst]]\3"),
  132. # Function argument names
  133. (re.compile(r"([a-zA-Z0-9_#]*)(\s?=\s?)"), r"[[c:green]]\1[[rst]]\2"),
  134. # Stack frame number
  135. (re.compile(r"^(#[0-9]+)", flags=re.MULTILINE), r"[[c:red]]\1[[rst]]"),
  136. # Thread id colorization
  137. (re.compile(r"^([ \*]) ([0-9]+)", flags=re.MULTILINE), r"[[c:light-cyan]]\1 [[c:red]]\2[[rst]]"),
  138. # File path and line number
  139. (re.compile(r"(\.*[/A-Za-z0-9\+_\.\-]*):(([0-9]+)(:[0-9]+)?)$", flags=re.MULTILINE), r"[[c:light-grey]]\1[[rst]]:[[c:magenta]]\2[[rst]]"),
  140. # Addresses
  141. (re.compile(r"\b(0x[a-f0-9]{6,})\b"), r"[[c:light-grey]]\1[[rst]]"),
  142. ]
  143. for regex, substitution in filters:
  144. text = regex.sub(substitution, text)
  145. return text
  146. def resolve_addresses(addresses, symbolizer, binary):
  147. addresses = list(set(addresses))
  148. cmd = [
  149. symbolizer,
  150. "-demangle",
  151. "-obj",
  152. binary,
  153. ]
  154. proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  155. out, err = proc.communicate(input="\n".join(addresses))
  156. if proc.returncode:
  157. raise Exception("Symbolizer failed with rc:{}\nstderr: {}".format(proc.returncode, err))
  158. resolved = filter(None, out.split("\n\n"))
  159. if len(addresses) != len(resolved):
  160. raise Exception("llvm-symbolizer can not extract lines from addresses (count mismatch: {}-{})".format(len(addresses), len(resolved)))
  161. return {k: v.strip(" \n") for k, v in zip(addresses, resolved)}