javascript.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import copy
  2. import itertools
  3. import re
  4. from os.path import splitext
  5. from urllib.parse import urlsplit
  6. from symbolic import SourceMapView, SourceView
  7. from files.models import File
  8. from sentry.utils.safe import get_path
  9. from .base import EventProcessorBase
  10. UNKNOWN_MODULE = "<unknown module>"
  11. CLEAN_MODULE_RE = re.compile(
  12. r"""^
  13. (?:/| # Leading slashes
  14. (?:
  15. (?:java)?scripts?|js|build|static|node_modules|bower_components|[_\.~].*?| # common folder prefixes
  16. v?(?:\d+\.)*\d+| # version numbers, v1, 1.0.0
  17. [a-f0-9]{7,8}| # short sha
  18. [a-f0-9]{32}| # md5
  19. [a-f0-9]{40} # sha1
  20. )/)+|
  21. (?:[-\.][a-f0-9]{7,}$) # Ending in a commitish
  22. """,
  23. re.X | re.I,
  24. )
  25. VERSION_RE = re.compile(r"^[a-f0-9]{32}|[a-f0-9]{40}$", re.I)
  26. NODE_MODULES_RE = re.compile(r"\bnode_modules/")
  27. def generate_module(src):
  28. """
  29. Converts a url into a made-up module name by doing the following:
  30. * Extract just the path name ignoring querystrings
  31. * Trimming off the initial /
  32. * Trimming off the file extension
  33. * Removes off useless folder prefixes
  34. e.g. http://google.com/js/v1.0/foo/bar/baz.js -> foo/bar/baz
  35. """
  36. if not src:
  37. return UNKNOWN_MODULE
  38. filename, _ = splitext(urlsplit(src).path)
  39. if filename.endswith(".min"):
  40. filename = filename[:-4]
  41. tokens = filename.split("/")
  42. for idx, token in enumerate(tokens):
  43. # a SHA
  44. if VERSION_RE.match(token):
  45. return "/".join(tokens[idx + 1 :])
  46. return CLEAN_MODULE_RE.sub("", filename) or UNKNOWN_MODULE
  47. class JavascriptEventProcessor(EventProcessorBase):
  48. """
  49. Based partially on sentry/lang/javascript/processor.py
  50. """
  51. release_files = None
  52. def should_run(self):
  53. return self.data.get("platform") in ("javascript", "node") and self.release_id
  54. def get_stacktraces(self):
  55. exceptions = get_path(self.data, "exception", "values", filter=True, default=())
  56. stacktraces = [e["stacktrace"] for e in exceptions if e.get("stacktrace")]
  57. if "stacktrace" in self.data:
  58. stacktraces.append(self.data["stacktrace"])
  59. return stacktraces
  60. def get_valid_frames(self, stacktraces):
  61. frames = []
  62. frames = [stacktrace["frames"] for stacktrace in stacktraces]
  63. merged = list(itertools.chain(*frames))
  64. return [f for f in merged if f is not None and f.get("lineno") is not None]
  65. def process_frame(self, frame, map_file, minified_source):
  66. # Required to determine source
  67. if not frame.get("abs_path") or not frame.get("lineno"):
  68. return
  69. minified_source.blob.blob.seek(0)
  70. map_file.blob.blob.seek(0)
  71. sourcemap_view = SourceMapView.from_json_bytes(map_file.blob.blob.read())
  72. minified_source_view = SourceView.from_bytes(minified_source.blob.blob.read())
  73. token = sourcemap_view.lookup(
  74. frame["lineno"] - 1,
  75. frame["colno"] - 1,
  76. frame["function"],
  77. minified_source_view,
  78. )
  79. if not token:
  80. return
  81. frame["lineno"] = token.src_line + 1
  82. frame["colno"] = token.src_col + 1
  83. if token.function_name:
  84. frame["function"] = token.function_name
  85. filename = token.src
  86. abs_path = frame["abs_path"]
  87. in_app = None
  88. # special case webpack support
  89. # abs_path will always be the full path with webpack:/// prefix.
  90. # filename will be relative to that
  91. if abs_path.startswith("webpack:"):
  92. filename = abs_path
  93. # webpack seems to use ~ to imply "relative to resolver root"
  94. # which is generally seen for third party deps
  95. # (i.e. node_modules)
  96. if "/~/" in filename:
  97. filename = "~/" + abs_path.split("/~/", 1)[-1]
  98. else:
  99. filename = filename.split("webpack:///", 1)[-1]
  100. # As noted above:
  101. # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
  102. # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
  103. # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
  104. # eg. webpack:///webpack/bootstrap, webpack:///external
  105. if (
  106. filename.startswith("~/")
  107. or "/node_modules/" in filename
  108. or not filename.startswith("./")
  109. ):
  110. in_app = False
  111. # And conversely, local dependencies start with './'
  112. elif filename.startswith("./"):
  113. in_app = True
  114. # We want to explicitly generate a webpack module name
  115. frame["module"] = generate_module(filename)
  116. elif "/node_modules/" in abs_path:
  117. in_app = False
  118. if abs_path.startswith("app:"):
  119. if filename and NODE_MODULES_RE.search(filename):
  120. in_app = False
  121. else:
  122. in_app = True
  123. frame["filename"] = filename
  124. if not frame.get("module") and abs_path.startswith(
  125. ("http:", "https:", "webpack:", "app:")
  126. ):
  127. frame["module"] = generate_module(abs_path)
  128. if in_app is not None:
  129. frame["in_app"] = in_app
  130. def transform(self):
  131. stacktraces = self.get_stacktraces()
  132. frames = self.get_valid_frames(stacktraces)
  133. filenames = {frame["filename"].split("/")[-1] for frame in frames}
  134. # Make a guess at which files are relevant, match then better after
  135. source_files = File.objects.filter(
  136. releasefile__release_id=self.release_id,
  137. name__in={filename + ".map" for filename in filenames} | filenames,
  138. )
  139. if not source_files:
  140. return
  141. # Copy original stacktrace before modifying them
  142. for exception in get_path(
  143. self.data, "exception", "values", filter=True, default=()
  144. ):
  145. exception["raw_stacktrace"] = copy.deepcopy(exception["stacktrace"])
  146. frames_with_source = []
  147. for frame in frames:
  148. minified_filename = frame["abs_path"].split("/")[-1]
  149. map_filename = minified_filename + ".map"
  150. minified_file = None
  151. map_file = None
  152. for source_file in source_files:
  153. if source_file.name == minified_filename:
  154. minified_file = source_file
  155. if source_file.name == map_filename:
  156. map_file = source_file
  157. if map_file:
  158. frames_with_source.append((frame, map_file, minified_file))
  159. for frame_with_source in frames_with_source:
  160. self.process_frame(*frame_with_source)