__init__.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. # coding: utf-8
  2. import os
  3. import re
  4. import glob
  5. import socket
  6. import logging
  7. import platform
  8. import subprocess
  9. import six
  10. from library.python.reservoir_sampling import reservoir_sampling
  11. logger = logging.getLogger(__name__)
  12. def _read_file(filename):
  13. with open(filename) as afile:
  14. return afile.read().strip("\n")
  15. def recover_core_dump_file(binary_path, cwd, pid):
  16. class CoreFilePattern(object):
  17. def __init__(self, path, mask):
  18. self.path = path
  19. self.mask = mask
  20. cwd = cwd or os.getcwd()
  21. system = platform.system().lower()
  22. if system.startswith("linux"):
  23. import stat
  24. import resource
  25. logger.debug("hostname = '%s'", socket.gethostname())
  26. logger.debug("rlimit_core = '%s'", str(resource.getrlimit(resource.RLIMIT_CORE)))
  27. core_pattern = _read_file("/proc/sys/kernel/core_pattern")
  28. logger.debug("core_pattern = '%s'", core_pattern)
  29. if core_pattern.startswith("/"):
  30. default_pattern = CoreFilePattern(os.path.dirname(core_pattern), '*')
  31. else:
  32. default_pattern = CoreFilePattern(cwd, '*')
  33. def resolve_core_mask(core_mask):
  34. def resolve(text):
  35. if text == "%p":
  36. return str(pid)
  37. elif text == "%e":
  38. # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/include/linux/sched.h#L847
  39. # https://github.com/torvalds/linux/blob/7876320f88802b22d4e2daf7eb027dd14175a0f8/fs/coredump.c#L278
  40. return os.path.basename(binary_path)[:15]
  41. elif text == "%E":
  42. return binary_path.replace("/", "!")
  43. elif text == "%%":
  44. return "%"
  45. elif text.startswith("%"):
  46. return "*"
  47. return text
  48. parts = filter(None, re.split(r"(%.)", core_mask))
  49. return "".join([resolve(p) for p in parts])
  50. # don't interpret a program for piping core dumps as a pattern
  51. if core_pattern and not core_pattern.startswith("|"):
  52. default_pattern.mask = os.path.basename(core_pattern)
  53. else:
  54. core_uses_pid = int(_read_file("/proc/sys/kernel/core_uses_pid"))
  55. logger.debug("core_uses_pid = '%d'", core_uses_pid)
  56. if core_uses_pid == 0:
  57. default_pattern.mask = "core"
  58. else:
  59. default_pattern.mask = "core.%p"
  60. # widely distributed core dump dir and mask (see DEVTOOLS-4408)
  61. yandex_pattern = CoreFilePattern('/coredumps', '%e.%p.%s')
  62. yandex_market_pattern = CoreFilePattern('/var/tmp/cores', 'core.%..%e.%s.%p.*')
  63. for pattern in [default_pattern, yandex_pattern, yandex_market_pattern]:
  64. pattern.mask = resolve_core_mask(pattern.mask)
  65. if not os.path.exists(pattern.path):
  66. logger.warning("Core dump dir doesn't exist: %s", pattern.path)
  67. continue
  68. logger.debug(
  69. "Core dump dir (%s) permission mask: %s (expected: %s (%s-dir, %s-sticky bit))",
  70. pattern.path,
  71. oct(os.stat(pattern.path)[stat.ST_MODE]),
  72. oct(stat.S_IFDIR | stat.S_ISVTX | 0o777),
  73. oct(stat.S_IFDIR),
  74. oct(stat.S_ISVTX),
  75. )
  76. logger.debug("Search for core dump files match pattern '%s' in '%s'", pattern.mask, pattern.path)
  77. cores = glob.glob(os.path.join(pattern.path, pattern.mask))
  78. files = os.listdir(pattern.path)
  79. logger.debug(
  80. "Matched core dump files (%d/%d): [%s] (mismatched samples: %s)",
  81. len(cores),
  82. len(files),
  83. ", ".join(cores),
  84. ", ".join(reservoir_sampling(files, 5)),
  85. )
  86. if len(cores) == 1:
  87. return cores[0]
  88. elif len(cores) > 1:
  89. stat = [(filename, os.stat(filename).st_mtime) for filename in cores]
  90. entry = sorted(stat, key=lambda x: x[1])[-1]
  91. logger.debug("Latest core dump file: '%s' with %d mtime", entry[0], entry[1])
  92. return entry[0]
  93. else:
  94. logger.debug("Core dump file recovering is not supported on '%s'", system)
  95. return None
  96. def get_gdb_full_backtrace(binary, core, gdb_path):
  97. cmd = [
  98. gdb_path, binary, core,
  99. "--eval-command", "set print thread-events off",
  100. "--eval-command", "thread apply all backtrace full",
  101. "--batch",
  102. "--quiet",
  103. ]
  104. proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  105. output, stderr = proc.communicate()
  106. output = six.ensure_str(output)
  107. if stderr:
  108. output += "\nstderr >>\n" + six.ensure_str(stderr)
  109. return output
  110. def get_problem_stack(backtrace):
  111. stack = []
  112. found_thread1 = False
  113. regex = re.compile(r'[Tt]hread (\d+)')
  114. for line in backtrace.split("\n"):
  115. match = regex.search(line)
  116. if match:
  117. if found_thread1:
  118. break
  119. if int(match.group(1)) == 1:
  120. found_thread1 = True
  121. if found_thread1:
  122. stack.append(line)
  123. if not stack:
  124. return backtrace
  125. return "\n".join(stack)
  126. # XXX
  127. def colorize_backtrace(text):
  128. filters = [
  129. # Function names and the class they belong to
  130. (re.compile(r"^(#[0-9]+ .*?)([a-zA-Z0-9_:\.@]+)(\s?\()", flags=re.MULTILINE), r"\1[[c:cyan]]\2[[rst]]\3"),
  131. # Function argument names
  132. (re.compile(r"([a-zA-Z0-9_#]*)(\s?=\s?)"), r"[[c:green]]\1[[rst]]\2"),
  133. # Stack frame number
  134. (re.compile(r"^(#[0-9]+)", flags=re.MULTILINE), r"[[c:red]]\1[[rst]]"),
  135. # Thread id colorization
  136. (re.compile(r"^([ \*]) ([0-9]+)", flags=re.MULTILINE), r"[[c:light-cyan]]\1 [[c:red]]\2[[rst]]"),
  137. # File path and line number
  138. (re.compile(r"(\.*[/A-Za-z0-9\+_\.\-]*):(([0-9]+)(:[0-9]+)?)$", flags=re.MULTILINE), r"[[c:light-grey]]\1[[rst]]:[[c:magenta]]\2[[rst]]"),
  139. # Addresses
  140. (re.compile(r"\b(0x[a-f0-9]{6,})\b"), r"[[c:light-grey]]\1[[rst]]"),
  141. ]
  142. for regex, substitution in filters:
  143. text = regex.sub(substitution, text)
  144. return text
  145. def resolve_addresses(addresses, symbolizer, binary):
  146. addresses = list(set(addresses))
  147. cmd = [
  148. symbolizer,
  149. "-demangle",
  150. "-obj",
  151. binary,
  152. ]
  153. proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  154. out, err = proc.communicate(input="\n".join(addresses))
  155. if proc.returncode:
  156. raise Exception("Symbolizer failed with rc:{}\nstderr: {}".format(proc.returncode, err))
  157. resolved = filter(None, out.split("\n\n"))
  158. if len(addresses) != len(resolved):
  159. raise Exception("llvm-symbolizer can not extract lines from addresses (count mismatch: {}-{})".format(len(addresses), len(resolved)))
  160. return {k: v.strip(" \n") for k, v in zip(addresses, resolved)}