javascript_event_processor.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. import copy
  2. import itertools
  3. import re
  4. from os.path import splitext
  5. from typing import TYPE_CHECKING
  6. from urllib.parse import urlsplit
  7. from symbolic import SourceMapView, SourceView
  8. from apps.sourcecode.models import DebugSymbolBundle
  9. from sentry.utils.safe import get_path
  10. if TYPE_CHECKING:
  11. from .schema import IssueEventSchema, StackTrace, StackTraceFrame
  12. UNKNOWN_MODULE = "<unknown module>"
  13. CLEAN_MODULE_RE = re.compile(
  14. r"""^
  15. (?:/| # Leading slashes
  16. (?:
  17. (?:java)?scripts?|js|build|static|node_modules|bower_components|[_\.~].*?| # common folder prefixes
  18. v?(?:\d+\.)*\d+| # version numbers, v1, 1.0.0
  19. [a-f0-9]{7,8}| # short sha
  20. [a-f0-9]{32}| # md5
  21. [a-f0-9]{40} # sha1
  22. )/)+|
  23. (?:[-\.][a-f0-9]{7,}$) # Ending in a commitish
  24. """,
  25. re.X | re.I,
  26. )
  27. VERSION_RE = re.compile(r"^[a-f0-9]{32}|[a-f0-9]{40}$", re.I)
  28. NODE_MODULES_RE = re.compile(r"\bnode_modules/")
  29. def generate_module(src):
  30. """
  31. Converts a url into a made-up module name by doing the following:
  32. * Extract just the path name ignoring querystrings
  33. * Trimming off the initial /
  34. * Trimming off the file extension
  35. * Removes off useless folder prefixes
  36. e.g. http://google.com/js/v1.0/foo/bar/baz.js -> foo/bar/baz
  37. """
  38. if not src:
  39. return UNKNOWN_MODULE
  40. filename, _ = splitext(urlsplit(src).path)
  41. if filename.endswith(".min"):
  42. filename = filename[:-4]
  43. tokens = filename.split("/")
  44. for idx, token in enumerate(tokens):
  45. # a SHA
  46. if VERSION_RE.match(token):
  47. return "/".join(tokens[idx + 1 :])
  48. return CLEAN_MODULE_RE.sub("", filename) or UNKNOWN_MODULE
  49. class JavascriptEventProcessor:
  50. """
  51. Based partially on sentry/lang/javascript/processor.py
  52. """
  53. def __init__(
  54. self,
  55. release_id: int,
  56. data: "IssueEventSchema",
  57. debug_bundles: list[DebugSymbolBundle],
  58. ):
  59. self.release_id = release_id
  60. self.data = data
  61. self.debug_bundles = debug_bundles
  62. def get_stacktraces(self) -> list["StackTrace"]:
  63. data = self.data
  64. if data.exception and not isinstance(data.exception, list):
  65. return [e.stacktrace for e in data.exception.values if e.stacktrace]
  66. return []
  67. def get_valid_frames(self, stacktraces) -> list["StackTraceFrame"]:
  68. frames = [stacktrace.frames for stacktrace in stacktraces]
  69. merged = list(itertools.chain(*frames))
  70. return [f for f in merged if f is not None and f.lineno is not None]
  71. def process_frame(self, frame, map_file, minified_source):
  72. # Required to determine source
  73. if not frame.abs_path or not frame.lineno:
  74. return
  75. minified_source.blob.blob.seek(0)
  76. map_file.blob.blob.seek(0)
  77. sourcemap_view = SourceMapView.from_json_bytes(map_file.blob.blob.read())
  78. minified_source_view = SourceView.from_bytes(minified_source.blob.blob.read())
  79. token = sourcemap_view.lookup(
  80. frame.lineno - 1,
  81. frame.colno - 1,
  82. frame.function,
  83. minified_source_view,
  84. )
  85. if not token:
  86. return
  87. frame.lineno = token.src_line + 1
  88. frame.colno = token.src_col + 1
  89. if token.function_name:
  90. frame.function = token.function_name
  91. filename = token.src
  92. abs_path = frame.abs_path
  93. in_app = None
  94. # special case webpack support
  95. # abs_path will always be the full path with webpack:/// prefix.
  96. # filename will be relative to that
  97. if abs_path.startswith("webpack:"):
  98. filename = abs_path
  99. # webpack seems to use ~ to imply "relative to resolver root"
  100. # which is generally seen for third party deps
  101. # (i.e. node_modules)
  102. if "/~/" in filename:
  103. filename = "~/" + abs_path.split("/~/", 1)[-1]
  104. else:
  105. filename = filename.split("webpack:///", 1)[-1]
  106. # As noted above:
  107. # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
  108. # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
  109. # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
  110. # eg. webpack:///webpack/bootstrap, webpack:///external
  111. if (
  112. filename.startswith("~/")
  113. or "/node_modules/" in filename
  114. or not filename.startswith("./")
  115. ):
  116. in_app = False
  117. # And conversely, local dependencies start with './'
  118. elif filename.startswith("./"):
  119. in_app = True
  120. # We want to explicitly generate a webpack module name
  121. frame["module"] = generate_module(filename)
  122. elif "/node_modules/" in abs_path:
  123. in_app = False
  124. if abs_path.startswith("app:"):
  125. if filename and NODE_MODULES_RE.search(filename):
  126. in_app = False
  127. else:
  128. in_app = True
  129. frame.filename = filename
  130. if not frame.module and abs_path.startswith(
  131. ("http:", "https:", "webpack:", "app:")
  132. ):
  133. frame.module = generate_module(abs_path)
  134. if in_app is not None:
  135. frame.in_app = in_app
  136. # Extract frame context
  137. source_result = next(
  138. (x for x in sourcemap_view.iter_sources() if x[1] == token.src), None
  139. )
  140. if source_result is not None:
  141. sourceview = sourcemap_view.get_sourceview(source_result[0])
  142. source = sourceview.get_source().splitlines()
  143. pre_lines = max(0, token.src_line - 5)
  144. past_lines = min(len(source), token.src_line + 5)
  145. frame.context_line = source[token.src_line]
  146. frame.pre_context = source[pre_lines : token.src_line]
  147. frame.post_context = source[token.src_line + 1 : past_lines]
  148. def transform(self):
  149. stacktraces = self.get_stacktraces()
  150. frames = self.get_valid_frames(stacktraces)
  151. if not self.debug_bundles:
  152. return
  153. # Copy original stacktrace before modifying them
  154. for exception in get_path(
  155. self.data, "exception", "values", filter=True, default=()
  156. ):
  157. exception["raw_stacktrace"] = copy.deepcopy(exception["stacktrace"])
  158. frames_with_source = []
  159. for frame in frames:
  160. minified_filename = frame.abs_path.split("/")[-1] if frame.abs_path else ""
  161. minified_file = None
  162. map_file = None
  163. for debug_bundle in self.debug_bundles:
  164. # File name as given. When debug ids are used, this is based on the debug id
  165. file_name = debug_bundle.file.name
  166. # The code file name is the one given by the debug_meta source code image
  167. # When debug id is used, we must match on this name
  168. code_file = debug_bundle.data.get("code_file")
  169. if code_file: # Get name, not full path
  170. code_file = code_file.split("/")[-1]
  171. if minified_filename in [file_name, code_file]:
  172. minified_file = debug_bundle.file
  173. map_file = debug_bundle.sourcemap_file
  174. if map_file:
  175. frames_with_source.append((frame, map_file, minified_file))
  176. for frame_with_source in frames_with_source:
  177. self.process_frame(*frame_with_source)