javascript_event_processor.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. import copy
  2. import itertools
  3. import re
  4. from os.path import splitext
  5. from urllib.parse import urlsplit
  6. from symbolic import SourceMapView, SourceView
  7. from apps.files.models import File
  8. from sentry.utils.safe import get_path
  9. from .schema import IssueEventSchema, StackTrace, StackTraceFrame
  10. UNKNOWN_MODULE = "<unknown module>"
  11. CLEAN_MODULE_RE = re.compile(
  12. r"""^
  13. (?:/| # Leading slashes
  14. (?:
  15. (?:java)?scripts?|js|build|static|node_modules|bower_components|[_\.~].*?| # common folder prefixes
  16. v?(?:\d+\.)*\d+| # version numbers, v1, 1.0.0
  17. [a-f0-9]{7,8}| # short sha
  18. [a-f0-9]{32}| # md5
  19. [a-f0-9]{40} # sha1
  20. )/)+|
  21. (?:[-\.][a-f0-9]{7,}$) # Ending in a commitish
  22. """,
  23. re.X | re.I,
  24. )
  25. VERSION_RE = re.compile(r"^[a-f0-9]{32}|[a-f0-9]{40}$", re.I)
  26. NODE_MODULES_RE = re.compile(r"\bnode_modules/")
  27. def generate_module(src):
  28. """
  29. Converts a url into a made-up module name by doing the following:
  30. * Extract just the path name ignoring querystrings
  31. * Trimming off the initial /
  32. * Trimming off the file extension
  33. * Removes off useless folder prefixes
  34. e.g. http://google.com/js/v1.0/foo/bar/baz.js -> foo/bar/baz
  35. """
  36. if not src:
  37. return UNKNOWN_MODULE
  38. filename, _ = splitext(urlsplit(src).path)
  39. if filename.endswith(".min"):
  40. filename = filename[:-4]
  41. tokens = filename.split("/")
  42. for idx, token in enumerate(tokens):
  43. # a SHA
  44. if VERSION_RE.match(token):
  45. return "/".join(tokens[idx + 1 :])
  46. return CLEAN_MODULE_RE.sub("", filename) or UNKNOWN_MODULE
  47. class JavascriptEventProcessor:
  48. """
  49. Based partially on sentry/lang/javascript/processor.py
  50. """
  51. def __init__(self, release_id: int, data: IssueEventSchema):
  52. self.release_id = release_id
  53. self.data = data
  54. def get_stacktraces(self) -> list[StackTrace]:
  55. data = self.data
  56. if data.exception and not isinstance(data.exception, list):
  57. return [e.stacktrace for e in data.exception.values if e.stacktrace]
  58. return []
  59. def get_valid_frames(self, stacktraces) -> list[StackTraceFrame]:
  60. frames = [stacktrace.frames for stacktrace in stacktraces]
  61. merged = list(itertools.chain(*frames))
  62. return [f for f in merged if f is not None and f.lineno is not None]
  63. def process_frame(self, frame, map_file, minified_source):
  64. # Required to determine source
  65. if not frame.abs_path or not frame.lineno:
  66. return
  67. minified_source.blob.blob.seek(0)
  68. map_file.blob.blob.seek(0)
  69. sourcemap_view = SourceMapView.from_json_bytes(map_file.blob.blob.read())
  70. minified_source_view = SourceView.from_bytes(minified_source.blob.blob.read())
  71. token = sourcemap_view.lookup(
  72. frame.lineno - 1,
  73. frame.colno - 1,
  74. frame.function,
  75. minified_source_view,
  76. )
  77. if not token:
  78. return
  79. frame.lineno = token.src_line + 1
  80. frame.colno = token.src_col + 1
  81. if token.function_name:
  82. frame.function = token.function_name
  83. filename = token.src
  84. abs_path = frame.abs_path
  85. in_app = None
  86. # special case webpack support
  87. # abs_path will always be the full path with webpack:/// prefix.
  88. # filename will be relative to that
  89. if abs_path.startswith("webpack:"):
  90. filename = abs_path
  91. # webpack seems to use ~ to imply "relative to resolver root"
  92. # which is generally seen for third party deps
  93. # (i.e. node_modules)
  94. if "/~/" in filename:
  95. filename = "~/" + abs_path.split("/~/", 1)[-1]
  96. else:
  97. filename = filename.split("webpack:///", 1)[-1]
  98. # As noted above:
  99. # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
  100. # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
  101. # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
  102. # eg. webpack:///webpack/bootstrap, webpack:///external
  103. if (
  104. filename.startswith("~/")
  105. or "/node_modules/" in filename
  106. or not filename.startswith("./")
  107. ):
  108. in_app = False
  109. # And conversely, local dependencies start with './'
  110. elif filename.startswith("./"):
  111. in_app = True
  112. # We want to explicitly generate a webpack module name
  113. frame["module"] = generate_module(filename)
  114. elif "/node_modules/" in abs_path:
  115. in_app = False
  116. if abs_path.startswith("app:"):
  117. if filename and NODE_MODULES_RE.search(filename):
  118. in_app = False
  119. else:
  120. in_app = True
  121. frame.filename = filename
  122. if not frame.module and abs_path.startswith(
  123. ("http:", "https:", "webpack:", "app:")
  124. ):
  125. frame.module = generate_module(abs_path)
  126. if in_app is not None:
  127. frame.in_app = in_app
  128. # Extract frame context
  129. source_result = next(
  130. (x for x in sourcemap_view.iter_sources() if x[1] == token.src), None
  131. )
  132. if source_result is not None:
  133. sourceview = sourcemap_view.get_sourceview(source_result[0])
  134. source = sourceview.get_source().splitlines()
  135. pre_lines = max(0, token.src_line - 5)
  136. past_lines = min(len(source), token.src_line + 5)
  137. frame.context_line = source[token.src_line]
  138. frame.pre_context = source[pre_lines : token.src_line]
  139. frame.post_context = source[token.src_line + 1: past_lines]
  140. def transform(self):
  141. stacktraces = self.get_stacktraces()
  142. frames = self.get_valid_frames(stacktraces)
  143. filenames = {frame.filename.split("/")[-1] for frame in frames}
  144. # Make a guess at which files are relevant, match then better after
  145. source_files = File.objects.filter(
  146. releasefile__release_id=self.release_id,
  147. name__in={filename + ".map" for filename in filenames} | filenames,
  148. )
  149. if not source_files:
  150. return
  151. # Copy original stacktrace before modifying them
  152. for exception in get_path(
  153. self.data, "exception", "values", filter=True, default=()
  154. ):
  155. exception["raw_stacktrace"] = copy.deepcopy(exception["stacktrace"])
  156. frames_with_source = []
  157. for frame in frames:
  158. minified_filename = frame.abs_path.split("/")[-1] if frame.abs_path else ""
  159. map_filename = minified_filename + ".map"
  160. minified_file = None
  161. map_file = None
  162. for source_file in source_files:
  163. if source_file.name == minified_filename:
  164. minified_file = source_file
  165. if source_file.name == map_filename:
  166. map_file = source_file
  167. if map_file:
  168. frames_with_source.append((frame, map_file, minified_file))
  169. for frame_with_source in frames_with_source:
  170. self.process_frame(*frame_with_source)