Browse Source

feat(processor): Use JavaScriptSmCacheStacktraceProcessor by default for internal projects (#41390)

This PR builds on top of
and prepares us for gradual rollout.
Kamil Ogórek 2 years ago

+ 7 - 0

@@ -1,3 +1,5 @@
+from django.conf import settings
 from sentry.plugins.base.v2 import Plugin2
 from sentry.stacktraces.processing import find_stacktraces_in_data
 from import get_path
@@ -5,6 +7,7 @@ from import get_path
 from .errorlocale import translate_exception
 from .errormapping import rewrite_exception
 from .processor import JavaScriptStacktraceProcessor
+from .processor_smcache import JavaScriptSmCacheStacktraceProcessor
 def preprocess_event(data):
@@ -41,5 +44,9 @@ class JavascriptPlugin(Plugin2):
         return []
     def get_stacktrace_processors(self, data, stacktrace_infos, platforms, **kwargs):
+        # TODO(smcache): Implement gradual rollout
         if "javascript" in platforms or "node" in platforms:
+            for value in (settings.SENTRY_FRONTEND_PROJECT, settings.SENTRY_PROJECT):
+                if str(data["project"]) == str(value):
+                    return [JavaScriptSmCacheStacktraceProcessor]
             return [JavaScriptStacktraceProcessor]

+ 15 - 368

@@ -16,7 +16,6 @@ from django.conf import settings
 from django.utils import timezone
 from django.utils.encoding import force_bytes, force_text
 from requests.utils import get_encoding_from_headers
-from symbolic import SourceMapCache as SmCache
 from symbolic import SourceMapView
 from sentry import features, http, options
@@ -34,7 +33,7 @@ from sentry.utils.files import compress_file
 from sentry.utils.hashlib import md5_text
 from sentry.utils.http import is_valid_origin
 from sentry.utils.retries import ConditionalRetryPolicy, exponential_delay
-from import get_path, set_path
+from import get_path
 from sentry.utils.urls import non_standard_url_join
 from .cache import SourceCache, SourceMapCache
@@ -86,13 +85,6 @@ class UnparseableSourcemap(http.BadSource):
     error_type = EventError.JS_INVALID_SOURCEMAP
-# TODO(smcache): Remove this function and all its usages.
-def should_run_smcache(cls):
-    return cls.has_smcache_feature is True and not isinstance(
-        cls, JavaScriptSmCacheStacktraceProcessor
-    )
 def trim_line(line, column=0):
     Trims a line down to a goal of 140 characters, with a little
@@ -128,7 +120,6 @@ def trim_line(line, column=0):
     return line
-# TODO(smcache): Remove in favor of `get_raw_source_context` (remove _raw from its name too).
 def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT):
     if not source:
         return None, None, None
@@ -159,36 +150,6 @@ def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT):
     return pre_context or None, context_line, post_context or None
-def get_raw_source_context(source, lineno, context=LINES_OF_CONTEXT):
-    if not source:
-        return None, None, None
-    # lineno's in JS are 1-indexed
-    # just in case. sometimes math is hard
-    if lineno > 0:
-        lineno -= 1
-    lower_bound = max(0, lineno - context)
-    upper_bound = min(lineno + 1 + context, len(source))
-    try:
-        pre_context = source[lower_bound:lineno]
-    except IndexError:
-        pre_context = []
-    try:
-        context_line = source[lineno]
-    except IndexError:
-        context_line = ""
-    try:
-        post_context = source[(lineno + 1) : upper_bound]
-    except IndexError:
-        post_context = []
-    return pre_context or None, context_line, post_context or None
 def discover_sourcemap(result):
     Given a UrlResult object, attempt to discover a sourcemap URL.
@@ -781,10 +742,7 @@ def get_max_age(headers):
     return min(max_age, CACHE_CONTROL_MAX)
-# TODO(smcache): Remove unnecessary `use_smcache` flag.
-def fetch_sourcemap(
-    url, source=b"", project=None, release=None, dist=None, allow_scraping=True, use_smcache=True
+def fetch_sourcemap(url, source=b"", project=None, release=None, dist=None, allow_scraping=True):
     if is_data_uri(url):
             body = base64.b64decode(
@@ -808,17 +766,10 @@ def fetch_sourcemap(
         body = result.body
-        # TODO(smcache): Remove unnecessary `use_smcache` flag and use `SmCache` only.
-        if use_smcache:
-            with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.fetch_sourcemap.SmCache.from_bytes"
-            ):
-                return SmCache.from_bytes(source, body)
-        else:
-            with sentry_sdk.start_span(
-                op="JavaScriptStacktraceProcessor.fetch_sourcemap.SourceMapView.from_json_bytes"
-            ):
-                return SourceMapView.from_json_bytes(body)
+        with sentry_sdk.start_span(
+            op="JavaScriptStacktraceProcessor.fetch_sourcemap.SourceMapView.from_json_bytes"
+        ):
+            return SourceMapView.from_json_bytes(body)
     except Exception as exc:
         # This is in debug because the product shows an error already.
@@ -861,39 +812,6 @@ def is_valid_frame(frame):
     return frame is not None and frame.get("lineno") is not None
-def get_function_for_token(frame, token, previous_frame=None):
-    """
-    Get function name for a given frame based on the token resolved by symbolic.
-    It tries following paths in order:
-    - return token function name if we have a usable value (filtered through `USELESS_FN_NAMES` list),
-    - return mapped name of the caller (previous frame) token if it had,
-    - return token function name, including filtered values if it mapped to anything in the first place,
-    - return current frames function name as a fallback
-    """
-    frame_function_name = frame.get("function")
-    token_function_name = token.function_name
-    # Try to use the function name we got from sourcemap-cache, filtering useless names.
-    if token_function_name not in USELESS_FN_NAMES:
-        return token_function_name
-    # If not found, ask the callsite (previous token) for function name if possible.
-    if previous_frame is not None:
-        # `preprocess_frame` is supposed to make sure that `data` is present,
-        # but better safe than sorry.
-        last_token = (previous_frame.get("data") or {}).get("token")
-        if last_token:
-            return
-    # If there was no minified name at all, return even useless, filtered one from the original token.
-    if not frame_function_name:
-        return token_function_name
-    # Otherwise fallback to the old, minified name.
-    return frame_function_name
 class JavaScriptStacktraceProcessor(StacktraceProcessor):
     Attempts to fetch source code for javascript frames.
@@ -937,18 +855,6 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
         self.release = None
         self.dist = None
-        # We only want to check the feature flag for this specific class, and not for
-        # `JavaScriptSmCacheStacktraceProcessor`., as it's asking remote Flagr for that data.
-        if not isinstance(self, JavaScriptSmCacheStacktraceProcessor):
-            self.has_smcache_feature = features.has(
-                "projects:sourcemapcache-processor", self.project
-            )
-        else:
-            self.has_smcache_feature = False
-        if should_run_smcache(self):
-            self.smcache_processor = JavaScriptSmCacheStacktraceProcessor(*args, **kwargs)
     def get_valid_frames(self):
         # build list of frames that we can actually grab source for
         frames = []
@@ -977,9 +883,6 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
-        if should_run_smcache(self):
-            self.smcache_processor.preprocess_step(None)
         return True
     def handles_frame(self, frame, stacktrace_info):
@@ -1070,7 +973,9 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
   ["token"] = token
             # Store original data in annotation
-            new_frame["data"] = dict(frame.get("data") or {}, sourcemap=sourcemap_label)
+            new_frame["data"] = dict(
+                frame.get("data") or {}, sourcemap=sourcemap_label, smcache=False
+            )
             sourcemap_applied = True
@@ -1206,11 +1111,6 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
                 new_frame["in_app"] = in_app
                 raw_frame["in_app"] = in_app
-            # Run new processor only for frames that were actually modified in any way.
-            if should_run_smcache(self) and new_frame != raw_frame:
-                smcache_rv = self.smcache_processor.process_frame(processable_frame, None)
-                set_path(new_frame, "data", "smcache_frame", value=smcache_rv[0][0])
             new_frames = [new_frame]
             raw_frames = [raw_frame] if changed_raw else None
@@ -1336,8 +1236,6 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
-                    # TODO(smcache): Remove unnecessary `use_smcache` flag.
-                    use_smcache=isinstance(self, JavaScriptSmCacheStacktraceProcessor),
         except http.BadSource as exc:
             # we don't perform the same check here as above, because if someone has
@@ -1353,17 +1251,12 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
         ) as span:
             sourcemaps.add(sourcemap_url, sourcemap_view)
-            # TODO(smcache): Remove this whole iteration block
-            if not isinstance(self, JavaScriptSmCacheStacktraceProcessor):
-                span.set_data("source_count", sourcemap_view.source_count)
-                # cache any inlined sources
-                for src_id, source_name in sourcemap_view.iter_sources():
-                    source_view = sourcemap_view.get_sourceview(src_id)
-                    if source_view is not None:
-                        self.cache.add(
-                            non_standard_url_join(sourcemap_url, source_name), source_view
-                        )
+            span.set_data("source_count", sourcemap_view.source_count)
+            # cache any inlined sources
+            for src_id, source_name in sourcemap_view.iter_sources():
+                source_view = sourcemap_view.get_sourceview(src_id)
+                if source_view is not None:
+                    self.cache.add(non_standard_url_join(sourcemap_url, source_name), source_view)
     def populate_source_cache(self, frames):
@@ -1433,249 +1326,3 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
             return has_short_stacktrace and is_suspicious_error and has_suspicious_frames(frames)
         return False
-class JavaScriptSmCacheStacktraceProcessor(JavaScriptStacktraceProcessor):
-    """
-    Modern SourceMap processor using symbolic-sourcemapcache.
-    Subclass of `JavaScriptStacktraceProcessor` with only changed methods overwritten.
-    To make it a default, change replace all `JavaScriptStacktraceProcessor` methods with
-    those from this class instead.
-    """
-    def __init__(self, *args, **kwargs):
-        JavaScriptStacktraceProcessor.__init__(self, *args, **kwargs)
-    def process_frame(self, processable_frame, processing_task):
-        """
-        Attempt to demangle the given frame.
-        """
-        frame = processable_frame.frame
-        token = None
-        cache = self.cache
-        sourcemaps = self.sourcemaps
-        all_errors = []
-        sourcemap_applied = False
-        # can't demangle if there's no filename or line number present
-        if not frame.get("abs_path") or not frame.get("lineno"):
-            return
-        # also can't demangle node's internal modules
-        # therefore we only process user-land frames (starting with /)
-        # or those created by bundle/webpack internals
-        if"platform") == "node" and not frame.get("abs_path").startswith(
-            ("/", "app:", "webpack:")
-        ):
-            return
-        errors = cache.get_errors(frame["abs_path"])
-        if errors:
-            all_errors.extend(errors)
-        # `source` is used for pre/post and `context_line` frame expansion.
-        # Here it's pointing to minified source, however the variable can be shadowed with the original sourceview
-        # (or `None` if the token doesnt provide us with the `context_line`) down the road.
-        source = self.get_sourceview(frame["abs_path"])
-        source_context = None
-        in_app = None
-        new_frame = dict(frame)
-        raw_frame = dict(frame)
-        sourcemap_url, sourcemap_cache = sourcemaps.get_link(frame["abs_path"])
-        self.sourcemaps_touched.add(sourcemap_url)
-        if sourcemap_cache and frame.get("colno") is None:
-            all_errors.append(
-                {"type": EventError.JS_NO_COLUMN, "url": http.expose_url(frame["abs_path"])}
-            )
-        elif sourcemap_cache:
-            if is_data_uri(sourcemap_url):
-                sourcemap_label = frame["abs_path"]
-            else:
-                sourcemap_label = sourcemap_url
-            sourcemap_label = http.expose_url(sourcemap_label)
-            try:
-                # Errors are 1-indexed in the frames.
-                assert frame["lineno"] > 0, "line numbers are 1-indexed"
-                token = sourcemap_cache.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
-            except Exception:
-                token = None
-                all_errors.append(
-                    {
-                        "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
-                        "column": frame.get("colno"),
-                        "row": frame.get("lineno"),
-                        "source": frame["abs_path"],
-                        "sourcemap": sourcemap_label,
-                    }
-                )
-            # persist the token so that we can find it later
-  ["token"] = token
-            # Store original data in annotation
-            new_frame["data"] = dict(frame.get("data") or {}, sourcemap=sourcemap_label)
-            sourcemap_applied = True
-            if token is not None:
-                if token.src is not None:
-                    abs_path = non_standard_url_join(sourcemap_url, token.src)
-                else:
-                    abs_path = frame["abs_path"]
-                logger.debug(
-                    "Mapping compressed source %r to mapping in %r", frame["abs_path"], abs_path
-                )
-                if token.context_line is not None:
-                    source_context = token.pre_context, token.context_line, token.post_context
-                else:
-                    source = self.get_sourceview(abs_path)
-                if source is None:
-                    errors = cache.get_errors(abs_path)
-                    if errors:
-                        all_errors.extend(errors)
-                    else:
-                        all_errors.append(
-                            {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(abs_path)}
-                        )
-                # The tokens are 1-indexed.
-                new_frame["lineno"] = token.line
-                new_frame["colno"] = token.col
-                new_frame["function"] = get_function_for_token(
-                    new_frame, token, processable_frame.previous_frame
-                )
-                filename = token.src
-                # special case webpack support
-                # abs_path will always be the full path with webpack:/// prefix.
-                # filename will be relative to that
-                if abs_path.startswith("webpack:"):
-                    filename = abs_path
-                    # webpack seems to use ~ to imply "relative to resolver root"
-                    # which is generally seen for third party deps
-                    # (i.e. node_modules)
-                    if "/~/" in filename:
-                        filename = "~/" + abs_path.split("/~/", 1)[-1]
-                    elif WEBPACK_NAMESPACE_RE.match(filename):
-                        filename = re.sub(WEBPACK_NAMESPACE_RE, "./", abs_path)
-                    else:
-                        filename = filename.split("webpack:///", 1)[-1]
-                    # As noted above:
-                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
-                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
-                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
-                    #   eg. webpack:///webpack/bootstrap, webpack:///external
-                    if (
-                        filename.startswith("~/")
-                        or "/node_modules/" in filename
-                        or not filename.startswith("./")
-                    ):
-                        in_app = False
-                    # And conversely, local dependencies start with './'
-                    elif filename.startswith("./"):
-                        in_app = True
-                    # We want to explicitly generate a webpack module name
-                    new_frame["module"] = generate_module(filename)
-                # while you could technically use a subpath of 'node_modules' for your libraries,
-                # it would be an extremely complicated decision and we've not seen anyone do it
-                # so instead we assume if node_modules is in the path its part of the vendored code
-                elif "/node_modules/" in abs_path:
-                    in_app = False
-                if abs_path.startswith("app:"):
-                    if filename and
-                        in_app = False
-                    else:
-                        in_app = True
-                new_frame["abs_path"] = abs_path
-                new_frame["filename"] = filename
-                if not frame.get("module") and abs_path.startswith(
-                    ("http:", "https:", "webpack:", "app:")
-                ):
-                    new_frame["module"] = generate_module(abs_path)
-        elif sourcemap_url:
-            new_frame["data"] = dict(
-                new_frame.get("data") or {}, sourcemap=http.expose_url(sourcemap_url)
-            )
-        changed_frame = self.expand_frame(new_frame, source_context=source_context, source=source)
-        # If we did not manage to match but we do have a line or column
-        # we want to report an error here.
-        if not new_frame.get("context_line") and source and new_frame.get("colno") is not None:
-            all_errors.append(
-                {
-                    "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
-                    "column": new_frame["colno"],
-                    "row": new_frame["lineno"],
-                    "source": new_frame["abs_path"],
-                }
-            )
-        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
-        if sourcemap_applied or all_errors or changed_frame or changed_raw:
-            # In case we are done processing, we iterate over all errors that we got
-            # and we filter out all `JS_MISSING_SOURCE` errors since we consider if we have
-            # a `context_line` we have a symbolicated frame and we don't need to show the error
-            has_context_line = bool(new_frame.get("context_line"))
-            if has_context_line:
-                all_errors[:] = [
-                    x for x in all_errors if x.get("type") is not EventError.JS_MISSING_SOURCE
-                ]
-            if in_app is not None:
-                new_frame["in_app"] = in_app
-                raw_frame["in_app"] = in_app
-            new_frames = [new_frame]
-            raw_frames = [raw_frame] if changed_raw else None
-            try:
-                if features.has(
-                    "organizations:javascript-console-error-tag", self.organization, actor=None
-                ):
-                    self.tag_suspected_console_errors(new_frames)
-            except Exception as exc:
-                logger.exception("Failed to tag suspected console errors", exc_info=exc)
-            return new_frames, raw_frames, all_errors
-    def expand_frame(self, frame, source_context=None, source=None):
-        """
-        Mutate the given frame to include pre- and post-context lines.
-        """
-        if frame.get("lineno") is None:
-            return False
-        if source_context is None:
-            source = source or self.get_sourceview(frame["abs_path"])
-            if source is None:
-                logger.debug("No source found for %s", frame["abs_path"])
-                return False
-        (pre_context, context_line, post_context) = source_context or get_raw_source_context(
-            source=source, lineno=frame["lineno"]
-        )
-        if pre_context is not None and len(pre_context) > 0:
-            frame["pre_context"] = [trim_line(x) for x in pre_context]
-        if context_line is not None:
-            frame["context_line"] = trim_line(context_line, frame.get("colno") or 0)
-        if post_context is not None and len(post_context) > 0:
-            frame["post_context"] = [trim_line(x) for x in post_context]
-        return True

+ 1351 - 0

@@ -0,0 +1,1351 @@
+import base64
+import errno
+import logging
+import re
+import sys
+import time
+import zlib
+from datetime import datetime
+from io import BytesIO
+from os.path import splitext
+from typing import IO, Optional, Tuple
+from urllib.parse import urlsplit
+import sentry_sdk
+from django.conf import settings
+from django.utils import timezone
+from django.utils.encoding import force_bytes, force_text
+from requests.utils import get_encoding_from_headers
+from symbolic import SourceMapCache as SmCache
+from sentry import features, http, options
+from sentry.event_manager import set_tag
+from sentry.models import EventError, Organization, ReleaseFile
+from sentry.models.releasefile import ARTIFACT_INDEX_FILENAME, ReleaseArchive, read_artifact_index
+from sentry.stacktraces.processing import StacktraceProcessor
+from sentry.utils import json, metrics
+# separate from either the source cache or the source maps cache, this is for
+# holding the results of attempting to fetch both kinds of files, either from the
+# database or from the internet
+from sentry.utils.cache import cache
+from sentry.utils.files import compress_file
+from sentry.utils.hashlib import md5_text
+from sentry.utils.http import is_valid_origin
+from sentry.utils.retries import ConditionalRetryPolicy, exponential_delay
+from import get_path
+from sentry.utils.urls import non_standard_url_join
+from .cache import SourceCache, SourceMapCache
+__all__ = ["JavaScriptSmCacheStacktraceProcessor"]
+# number of surrounding lines (on each side) to fetch
+BASE64_SOURCEMAP_PREAMBLE = "data:application/json;base64,"
+UNKNOWN_MODULE = "<unknown module>"
+# Names that do not provide any reasonable value, and that can possibly obstruct
+# better available names. In case we encounter one, we fallback to current frame fn name if available.
+USELESS_FN_NAMES = ["<anonymous>", "__webpack_require__", "__webpack_modules__"]
+CLEAN_MODULE_RE = re.compile(
+    r"""^
+(?:/|  # Leading slashes
+    (?:java)?scripts?|js|build|static|node_modules|bower_components|[_\.~].*?|  # common folder prefixes
+    v?(?:\d+\.)*\d+|   # version numbers, v1, 1.0.0
+    [a-f0-9]{7,8}|     # short sha
+    [a-f0-9]{32}|      # md5
+    [a-f0-9]{40}       # sha1
+(?:[-\.][a-f0-9]{7,}$)  # Ending in a commitish
+    re.X | re.I,
+VERSION_RE = re.compile(r"^[a-f0-9]{32}|[a-f0-9]{40}$", re.I)
+NODE_MODULES_RE = re.compile(r"\bnode_modules/")
+# Default Webpack output path using multiple namespace -
+# eg. webpack://myproject/./src/lib/hellothere.js
+WEBPACK_NAMESPACE_RE = re.compile(r"^webpack://[a-zA-Z0-9_\-@\.]+/\./")
+SOURCE_MAPPING_URL_RE = re.compile(b"//# sourceMappingURL=(.*)$")
+CACHE_CONTROL_RE = re.compile(r"max-age=(\d+)")
+# the maximum number of remote resources (i.e. source files) that should be
+# fetched
+logger = logging.getLogger(__name__)
+class UnparseableSourcemap(http.BadSource):
+    error_type = EventError.JS_INVALID_SOURCEMAP
+def trim_line(line, column=0):
+    """
+    Trims a line down to a goal of 140 characters, with a little
+    wiggle room to be sensible and tries to trim around the given
+    `column`. So it tries to extract 60 characters before and after
+    the provided `column` and yield a better context.
+    """
+    line = line.strip("\n")
+    ll = len(line)
+    if ll <= 150:
+        return line
+    if column > ll:
+        column = ll
+    start = max(column - 60, 0)
+    # Round down if it brings us close to the edge
+    if start < 5:
+        start = 0
+    end = min(start + 140, ll)
+    # Round up to the end if it's close
+    if end > ll - 5:
+        end = ll
+    # If we are bumped all the way to the end,
+    # make sure we still get a full 140 characters in the line
+    if end == ll:
+        start = max(end - 140, 0)
+    line = line[start:end]
+    if end < ll:
+        # we've snipped from the end
+        line += " {snip}"
+    if start > 0:
+        # we've snipped from the beginning
+        line = "{snip} " + line
+    return line
+def get_source_context(source, lineno, context=LINES_OF_CONTEXT):
+    if not source:
+        return None, None, None
+    # lineno's in JS are 1-indexed
+    # just in case. sometimes math is hard
+    if lineno > 0:
+        lineno -= 1
+    lower_bound = max(0, lineno - context)
+    upper_bound = min(lineno + 1 + context, len(source))
+    try:
+        pre_context = source[lower_bound:lineno]
+    except IndexError:
+        pre_context = []
+    try:
+        context_line = source[lineno]
+    except IndexError:
+        context_line = ""
+    try:
+        post_context = source[(lineno + 1) : upper_bound]
+    except IndexError:
+        post_context = []
+    return pre_context or None, context_line, post_context or None
+def discover_sourcemap(result):
+    """
+    Given a UrlResult object, attempt to discover a sourcemap URL.
+    """
+    # When coercing the headers returned by urllib to a dict
+    # all keys become lowercase so they're normalized
+    sourcemap = result.headers.get("sourcemap", result.headers.get("x-sourcemap"))
+    # Force the header value to bytes since we'll be manipulating bytes here
+    sourcemap = force_bytes(sourcemap) if sourcemap is not None else None
+    if not sourcemap:
+        parsed_body = result.body.split(b"\n")
+        # Source maps are only going to exist at either the top or bottom of the document.
+        # Technically, there isn't anything indicating *where* it should exist, so we
+        # are generous and assume it's somewhere either in the first or last 5 lines.
+        # If it's somewhere else in the document, you're probably doing it wrong.
+        if len(parsed_body) > 10:
+            possibilities = parsed_body[:5] + parsed_body[-5:]
+        else:
+            possibilities = parsed_body
+        # We want to scan each line sequentially, and the last one found wins
+        # This behavior is undocumented, but matches what Chrome and Firefox do.
+        for line in possibilities:
+            if line[:21] in (b"//# sourceMappingURL=", b"//@ sourceMappingURL="):
+                # We want everything AFTER the indicator, which is 21 chars long
+                sourcemap = line[21:].rstrip()
+        # If we still haven't found anything, check end of last line AFTER source code.
+        # This is not the literal interpretation of the spec, but browsers support it.
+        # e.g. {code}//# sourceMappingURL={url}
+        if not sourcemap:
+            # Only look at last 300 characters to keep search space reasonable (minified
+            # JS on a single line could be tens of thousands of chars). This is a totally
+            # arbitrary number / best guess; most sourceMappingURLs are relative and
+            # not very long.
+            search_space = possibilities[-1][-300:].rstrip()
+            match =
+            if match:
+                sourcemap =
+    if sourcemap:
+        # react-native shoves a comment at the end of the
+        # sourceMappingURL line.
+        # For example:
+        #*ascii:...*/
+        # This comment is completely out of spec and no browser
+        # would support this, but we need to strip it to make
+        # people happy.
+        if b"/*" in sourcemap and sourcemap[-2:] == b"*/":
+            index = sourcemap.index(b"/*")
+            # comment definitely shouldn't be the first character,
+            # so let's just make sure of that.
+            if index == 0:
+                raise AssertionError(
+                    "react-native comment found at bad location: %d, %r" % (index, sourcemap)
+                )
+            sourcemap = sourcemap[:index]
+        # fix url so its absolute
+        sourcemap = non_standard_url_join(result.url, force_text(sourcemap))
+    return force_text(sourcemap) if sourcemap is not None else None
+def get_release_file_cache_key(release_id, releasefile_ident):
+    return f"releasefile:v1:{release_id}:{releasefile_ident}"
+def get_release_file_cache_key_meta(release_id, releasefile_ident):
+    return "meta:%s" % get_release_file_cache_key(release_id, releasefile_ident)
+def should_retry_fetch(attempt: int, e: Exception) -> bool:
+    return not attempt > MAX_FETCH_ATTEMPTS and isinstance(e, OSError) and e.errno == errno.ESTALE
+fetch_retry_policy = ConditionalRetryPolicy(should_retry_fetch, exponential_delay(0.05))
+def fetch_and_cache_artifact(filename, fetch_fn, cache_key, cache_key_meta, headers, compress_fn):
+    # If the release file is not in cache, check if we can retrieve at
+    # least the size metadata from cache and prevent compression and
+    # caching if payload exceeds the backend limit.
+    z_body_size = None
+        cache_meta = cache.get(cache_key_meta)
+        if cache_meta:
+            z_body_size = int(cache_meta.get("compressed_size"))
+    def fetch_release_body():
+        with fetch_fn() as fp:
+            if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE:
+                return None,
+            else:
+                with sentry_sdk.start_span(
+                    op="JavaScriptSmCacheStacktraceProcessor.fetch_and_cache_artifact.compress"
+                ):
+                    return compress_fn(fp)
+    try:
+        with metrics.timer("sourcemaps.release_file_read"):
+            z_body, body = fetch_retry_policy(fetch_release_body)
+    except Exception:
+        logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info())
+        result = None
+    else:
+        headers = {k.lower(): v for k, v in headers.items()}
+        encoding = get_encoding_from_headers(headers)
+        result = http.UrlResult(filename, headers, body, 200, encoding)
+        # If we don't have the compressed body for caching because the
+        # cached metadata said it is too large payload for the cache
+        # backend, do not attempt to cache.
+        if z_body:
+            # This will implicitly skip too large payloads. Those will be cached
+            # on the file system by `ReleaseFile.cache`, instead.
+            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)
+            # In case the previous call to cache implicitly fails, we use
+            # the meta data to avoid pointless compression which is done
+            # only for caching.
+            cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600)
+    return result
+def get_cache_keys(filename, release, dist):
+    dist_name = dist and or None
+    releasefile_ident = ReleaseFile.get_ident(filename, dist_name)
+    cache_key = get_release_file_cache_key(
+, releasefile_ident=releasefile_ident
+    )
+    # Cache key to store file metadata, currently only the size of the
+    # compressed version of file. We cannot use the cache_key because large
+    # payloads (silently) fail to cache due to e.g. memcached payload size
+    # limitation and we use the meta data to avoid compression of such a files.
+    cache_key_meta = get_release_file_cache_key_meta(
+, releasefile_ident=releasefile_ident
+    )
+    return cache_key, cache_key_meta
+def result_from_cache(filename, result):
+    # Previous caches would be a 3-tuple instead of a 4-tuple,
+    # so this is being maintained for backwards compatibility
+    try:
+        encoding = result[3]
+    except IndexError:
+        encoding = None
+    return http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding)
+def fetch_release_file(filename, release, dist=None):
+    """
+    Attempt to retrieve a release artifact from the database.
+    Caches the result of that attempt (whether successful or not).
+    """
+    dist_name = dist and or None
+    cache_key, cache_key_meta = get_cache_keys(filename, release, dist)
+    logger.debug("Checking cache for release artifact %r (release_id=%s)", filename,
+    result = cache.get(cache_key)
+    # not in the cache (meaning we haven't checked the database recently), so check the database
+    if result is None:
+        with metrics.timer("sourcemaps.release_artifact_from_file"):
+            filename_choices = ReleaseFile.normalize(filename)
+            filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]
+            logger.debug(
+                "Checking database for release artifact %r (release_id=%s)", filename,
+            )
+            possible_files = list(
+                ReleaseFile.objects.filter(
+          ,
+           if dist else dist,
+                    ident__in=filename_idents,
+                ).select_related("file")
+            )
+            if len(possible_files) == 0:
+                logger.debug(
+                    "Release artifact %r not found in database (release_id=%s)",
+                    filename,
+          ,
+                )
+                cache.set(cache_key, -1, 60)
+                return None
+            elif len(possible_files) == 1:
+                releasefile = possible_files[0]
+            else:
+                # Pick first one that matches in priority order.
+                # This is O(N*M) but there are only ever at most 4 things here
+                # so not really worth optimizing.
+                releasefile = next(
+                    rf for ident in filename_idents for rf in possible_files if rf.ident == ident
+                )
+            logger.debug(
+                "Found release artifact %r (id=%s, release_id=%s)",
+                filename,
+      ,
+      ,
+            )
+            with sentry_sdk.start_span(
+                op="JavaScriptSmCacheStacktraceProcessor.fetch_release_file.fetch_and_cache"
+            ):
+                result = fetch_and_cache_artifact(
+                    filename,
+                    lambda: ReleaseFile.cache.getfile(releasefile),
+                    cache_key,
+                    cache_key_meta,
+                    releasefile.file.headers,
+                    compress_file,
+                )
+    # in the cache as an unsuccessful attempt
+    elif result == -1:
+        result = None
+    # in the cache as a successful attempt, including the zipped contents of the file
+    else:
+        result = result_from_cache(filename, result)
+    return result
+def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]:
+    candidates = ReleaseFile.normalize(url)
+    for candidate in candidates:
+        try:
+            return archive.get_file_by_url(candidate)
+        except KeyError:
+            pass
+    # None of the filenames matched
+    raise KeyError(f"Not found in archive: '{url}'")
+def get_artifact_index(release, dist):
+    dist_name = dist and or None
+    ident = ReleaseFile.get_ident(ARTIFACT_INDEX_FILENAME, dist_name)
+    cache_key = f"artifact-index:v1:{}:{ident}"
+    result = cache.get(cache_key)
+    if result == -1:
+        index = None
+    elif result:
+        index = json.loads(result)
+    else:
+        index = read_artifact_index(release, dist, use_cache=True)
+        cache_value = -1 if index is None else json.dumps(index)
+        # Only cache for a short time to keep the manifest up-to-date
+        cache.set(cache_key, cache_value, timeout=60)
+    return index
+def get_index_entry(release, dist, url) -> Optional[dict]:
+    try:
+        index = get_artifact_index(release, dist)
+    except Exception as exc:
+        logger.error("sourcemaps.index_read_failed", exc_info=exc)
+        return None
+    if index:
+        for candidate in ReleaseFile.normalize(url):
+            entry = index.get("files", {}).get(candidate)
+            if entry:
+                return entry
+    return None
+def fetch_release_archive_for_url(release, dist, url) -> Optional[IO]:
+    """Fetch release archive and cache if possible.
+    Multiple archives might have been uploaded, so we need the URL
+    to get the correct archive from the artifact index.
+    If return value is not empty, the caller is responsible for closing the stream.
+    """
+    with sentry_sdk.start_span(op="fetch_release_archive_for_url.get_index_entry"):
+        info = get_index_entry(release, dist, url)
+    if info is None:
+        # Cannot write negative cache entry here because ID of release archive
+        # is not yet known
+        return None
+    archive_ident = info["archive_ident"]
+    # TODO(jjbayer): Could already extract filename from info and return
+    # it later
+    cache_key = get_release_file_cache_key(, releasefile_ident=archive_ident)
+    result = cache.get(cache_key)
+    if result == -1:
+        return None
+    elif result:
+        return BytesIO(result)
+    else:
+        try:
+            with sentry_sdk.start_span(op="fetch_release_archive_for_url.get_releasefile_db_entry"):
+                qs = ReleaseFile.objects.filter(
+          , if dist else dist, ident=archive_ident
+                ).select_related("file")
+                releasefile = qs[0]
+        except IndexError:
+            # This should not happen when there is an archive_ident in the manifest
+            logger.error("sourcemaps.missing_archive", exc_info=sys.exc_info())
+            # Cache as nonexistent:
+            cache.set(cache_key, -1, 60)
+            return None
+        else:
+            try:
+                with sentry_sdk.start_span(op="fetch_release_archive_for_url.fetch_releasefile"):
+                    if releasefile.file.size <= options.get("releasefile.cache-max-archive-size"):
+                        getfile = lambda: ReleaseFile.cache.getfile(releasefile)
+                    else:
+                        # For very large ZIP archives, pulling the entire file into cache takes too long.
+                        # Only the blobs required to extract the current artifact (central directory and the file entry itself)
+                        # should be loaded in this case.
+                        getfile = releasefile.file.getfile
+                    file_ = fetch_retry_policy(getfile)
+            except Exception:
+                logger.error("sourcemaps.read_archive_failed", exc_info=sys.exc_info())
+                return None
+            # `cache.set` will only keep values up to a certain size,
+            # so we should not read the entire file if it's too large for caching
+            if CACHE_MAX_VALUE_SIZE is not None and file_.size > CACHE_MAX_VALUE_SIZE:
+                return file_
+            with sentry_sdk.start_span(op="fetch_release_archive_for_url.read_for_caching") as span:
+                span.set_data("file_size", file_.size)
+                contents =
+            with sentry_sdk.start_span(op="fetch_release_archive_for_url.write_to_cache") as span:
+                span.set_data("file_size", len(contents))
+                cache.set(cache_key, contents, 3600)
+            return file_
+def compress(fp: IO) -> Tuple[bytes, bytes]:
+    """Alternative for compress_file when fp does not support chunks"""
+    content =
+    return zlib.compress(content), content
+def fetch_release_artifact(url, release, dist):
+    """
+    Get a release artifact either by extracting it or fetching it directly.
+    If a release archive was saved, the individual file will be extracted
+    from the archive.
+    """
+    cache_key, cache_key_meta = get_cache_keys(url, release, dist)
+    result = cache.get(cache_key)
+    if result == -1:  # Cached as unavailable
+        return None
+    if result:
+        return result_from_cache(url, result)
+    start = time.monotonic()
+    with sentry_sdk.start_span(
+        op="JavaScriptSmCacheStacktraceProcessor.fetch_release_artifact.fetch_release_archive_for_url"
+    ):
+        archive_file = fetch_release_archive_for_url(release, dist, url)
+    if archive_file is not None:
+        try:
+            archive = ReleaseArchive(archive_file)
+        except Exception as exc:
+            logger.error(
+                "Failed to initialize archive for release %s",
+      ,
+                exc_info=exc,
+                extra={"contents": base64.b64encode(},
+            )
+            # TODO(jjbayer): cache error and return here
+        else:
+            with archive:
+                try:
+                    fp, headers = get_from_archive(url, archive)
+                except KeyError:
+                    # The manifest mapped the url to an archive, but the file
+                    # is not there.
+                    logger.error(
+                        "Release artifact %r not found in archive %s", url,
+                    )
+                    cache.set(cache_key, -1, 60)
+                    metrics.timing(
+                        "sourcemaps.release_artifact_from_archive", time.monotonic() - start
+                    )
+                    return None
+                except Exception as exc:
+                    logger.error("Failed to read %s from release %s", url,, exc_info=exc)
+                    # TODO(jjbayer): cache error and return here
+                else:
+                    result = fetch_and_cache_artifact(
+                        url,
+                        lambda: fp,
+                        cache_key,
+                        cache_key_meta,
+                        headers,
+                        # Cannot use `compress_file` because `ZipExtFile` does not support chunks
+                        compress_fn=compress,
+                    )
+                    metrics.timing(
+                        "sourcemaps.release_artifact_from_archive", time.monotonic() - start
+                    )
+                    return result
+    # Fall back to maintain compatibility with old releases and versions of
+    # sentry-cli which upload files individually
+    with sentry_sdk.start_span(
+        op="JavaScriptSmCacheStacktraceProcessor.fetch_release_artifact.fetch_release_file"
+    ):
+        result = fetch_release_file(url, release, dist)
+    return result
+def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
+    """
+    Pull down a URL, returning a UrlResult object.
+    Attempts to fetch from the database first (assuming there's a release on the
+    event), then the internet. Caches the result of each of those two attempts
+    separately, whether or not those attempts are successful. Used for both
+    source files and source maps.
+    """
+    # If our url has been truncated, it'd be impossible to fetch
+    # so we check for this early and bail
+    if url[-3:] == "...":
+        raise http.CannotFetch({"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url)})
+    # if we've got a release to look on, try that first (incl associated cache)
+    if release:
+        with sentry_sdk.start_span(
+            op="JavaScriptSmCacheStacktraceProcessor.fetch_file.fetch_release_artifact"
+        ):
+            result = fetch_release_artifact(url, release, dist)
+    else:
+        result = None
+    # otherwise, try the web-scraping cache and then the web itself
+    cache_key = f"source:cache:v4:{md5_text(url).hexdigest()}"
+    if result is None:
+        if not allow_scraping or not url.startswith(("http:", "https:")):
+            error = {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(url)}
+            raise http.CannotFetch(error)
+        logger.debug("Checking cache for url %r", url)
+        result = cache.get(cache_key)
+        if result is not None:
+            # Previous caches would be a 3-tuple instead of a 4-tuple,
+            # so this is being maintained for backwards compatibility
+            try:
+                encoding = result[4]
+            except IndexError:
+                encoding = None
+            # We got a cache hit, but the body is compressed, so we
+            # need to decompress it before handing it off
+            result = http.UrlResult(
+                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
+            )
+    if result is None:
+        headers = {}
+        verify_ssl = False
+        if project and is_valid_origin(url, project=project):
+            verify_ssl = bool(project.get_option("sentry:verify_ssl", False))
+            token = project.get_option("sentry:token")
+            if token:
+                token_header = project.get_option("sentry:token_header") or "X-Sentry-Token"
+                headers[token_header] = token
+        with metrics.timer("sourcemaps.fetch"):
+            with sentry_sdk.start_span(op="JavaScriptSmCacheStacktraceProcessor.fetch_file.http"):
+                result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
+            with sentry_sdk.start_span(
+                op="JavaScriptSmCacheStacktraceProcessor.fetch_file.compress_for_cache"
+            ):
+                z_body = zlib.compress(result.body)
+            cache.set(
+                cache_key,
+                (url, result.headers, z_body, result.status, result.encoding),
+                get_max_age(result.headers),
+            )
+            # since the cache.set above can fail we can end up in a situation
+            # where the file is too large for the cache. In that case we abort
+            # the fetch and cache a failure and lock the domain for future
+            # http fetches.
+            if cache.get(cache_key) is None:
+                error = {
+                    "type": EventError.TOO_LARGE_FOR_CACHE,
+                    "url": http.expose_url(url),
+                }
+                http.lock_domain(url, error=error)
+                raise http.CannotFetch(error)
+    # If we did not get a 200 OK we just raise a cannot fetch here.
+    if result.status != 200:
+        raise http.CannotFetch(
+            {
+                "type": EventError.FETCH_INVALID_HTTP_CODE,
+                "value": result.status,
+                "url": http.expose_url(url),
+            }
+        )
+    # Make sure the file we're getting back is bytes. The only
+    # reason it'd not be binary would be from old cached blobs, so
+    # for compatibility with current cached files, let's coerce back to
+    # binary and say utf8 encoding.
+    if not isinstance(result.body, bytes):
+        try:
+            result = http.UrlResult(
+                result.url,
+                result.headers,
+                result.body.encode("utf8"),
+                result.status,
+                result.encoding,
+            )
+        except UnicodeEncodeError:
+            error = {
+                "type": EventError.FETCH_INVALID_ENCODING,
+                "value": "utf8",
+                "url": http.expose_url(url),
+            }
+            raise http.CannotFetch(error)
+    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
+    # NOTE: possible to have JS files that don't actually end w/ ".js", but
+    # this should catch 99% of cases
+    if urlsplit(url).path.endswith(".js"):
+        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
+        # This cannot parse as valid JS/JSON.
+        # NOTE: not relying on Content-Type header because apps often don't set this correctly
+        # Discard leading whitespace (often found before doctype)
+        body_start = result.body[:20].lstrip()
+        if body_start[:1] == b"<":
+            error = {"type": EventError.JS_INVALID_CONTENT, "url": url}
+            raise http.CannotFetch(error)
+    return result
+def get_max_age(headers):
+    cache_control = headers.get("cache-control")
+    max_age = CACHE_CONTROL_MIN
+    if cache_control:
+        match =
+        if match:
+            max_age = max(CACHE_CONTROL_MIN, int(
+    return min(max_age, CACHE_CONTROL_MAX)
+def fetch_sourcemap(url, source=b"", project=None, release=None, dist=None, allow_scraping=True):
+    if is_data_uri(url):
+        try:
+            body = base64.b64decode(
+                force_bytes(url[BASE64_PREAMBLE_LENGTH:])
+                + (b"=" * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4))
+            )
+        except TypeError as e:
+            raise UnparseableSourcemap({"url": "<base64>", "reason": str(e)})
+    else:
+        # look in the database and, if not found, optionally try to scrape the web
+        with sentry_sdk.start_span(
+            op="JavaScriptSmCacheStacktraceProcessor.fetch_sourcemap.fetch_file"
+        ) as span:
+            span.set_data("url", url)
+            result = fetch_file(
+                url,
+                project=project,
+                release=release,
+                dist=dist,
+                allow_scraping=allow_scraping,
+            )
+        body = result.body
+    try:
+        with sentry_sdk.start_span(
+            op="JavaScriptSmCacheStacktraceProcessor.fetch_sourcemap.SmCache.from_bytes"
+        ):
+            return SmCache.from_bytes(source, body)
+    except Exception as exc:
+        # This is in debug because the product shows an error already.
+        logger.debug(str(exc), exc_info=True)
+        raise UnparseableSourcemap({"url": http.expose_url(url)})
+def is_data_uri(url):
+def generate_module(src):
+    """
+    Converts a url into a made-up module name by doing the following:
+     * Extract just the path name ignoring querystrings
+     * Trimming off the initial /
+     * Trimming off the file extension
+     * Removes off useless folder prefixes
+    e.g. -> foo/bar/baz
+    """
+    if not src:
+        return UNKNOWN_MODULE
+    filename, ext = splitext(urlsplit(src).path)
+    if filename.endswith(".min"):
+        filename = filename[:-4]
+    # TODO(dcramer): replace CLEAN_MODULE_RE with tokenizer completely
+    tokens = filename.split("/")
+    for idx, token in enumerate(tokens):
+        # a SHA
+        if VERSION_RE.match(token):
+            return "/".join(tokens[idx + 1 :])
+    return CLEAN_MODULE_RE.sub("", filename) or UNKNOWN_MODULE
+def is_valid_frame(frame):
+    return frame is not None and frame.get("lineno") is not None
+def get_function_for_token(frame, token, previous_frame=None):
+    """
+    Get function name for a given frame based on the token resolved by symbolic.
+    It tries following paths in order:
+    - return token function name if we have a usable value (filtered through `USELESS_FN_NAMES` list),
+    - return mapped name of the caller (previous frame) token if it had,
+    - return token function name, including filtered values if it mapped to anything in the first place,
+    - return current frames function name as a fallback
+    """
+    frame_function_name = frame.get("function")
+    token_function_name = token.function_name
+    # Try to use the function name we got from sourcemap-cache, filtering useless names.
+    if token_function_name not in USELESS_FN_NAMES:
+        return token_function_name
+    # If not found, ask the callsite (previous token) for function name if possible.
+    if previous_frame is not None:
+        last_token ="token")
+        if last_token is not None and not in ("", None):
+            return
+    # If there was no minified name at all, return even useless, filtered one from the original token.
+    if not frame_function_name:
+        return token_function_name
+    # Otherwise fallback to the old, minified name.
+    return frame_function_name
+class JavaScriptSmCacheStacktraceProcessor(StacktraceProcessor):
+    """
+    Modern SourceMap processor using symbolic-sourcemapcache.
+    Attempts to fetch source code for javascript frames,
+    and map their minified positions to original location.
+    Frames must match the following requirements:
+    - lineno >= 0
+    - colno >= 0
+    - abs_path is the HTTP URI to the source
+    - context_line is empty
+    Mutates the input ``data`` with expanded context if available.
+    """
+    def __init__(self, *args, **kwargs):
+        StacktraceProcessor.__init__(self, *args, **kwargs)
+        # Make sure we only fetch organization from cache
+        # We don't need to persist it back since we don't want
+        # to bloat the Event object.
+        organization = getattr(self.project, "_organization_cache", None)
+        if not organization:
+            organization = Organization.objects.get_from_cache(id=self.project.organization_id)
+        self.organization = organization
+        self.max_fetches = MAX_RESOURCE_FETCHES
+        self.allow_scraping = organization.get_option(
+            "sentry:scrape_javascript", True
+        ) is not False and self.project.get_option("sentry:scrape_javascript", True)
+        self.fetch_count = 0
+        self.sourcemaps_touched = set()
+        # cache holding mangled code, original code, and errors associated with
+        # each abs_path in the stacktrace
+        self.cache = SourceCache()
+        # cache holding source URLs, corresponding source map URLs, and source map contents
+        self.sourcemaps = SourceMapCache()
+        self.release = None
+        self.dist = None
+    def get_valid_frames(self):
+        # build list of frames that we can actually grab source for
+        frames = []
+        for info in self.stacktrace_infos:
+            frames.extend(get_path(info.stacktrace, "frames", filter=is_valid_frame, default=()))
+        return frames
+    def preprocess_step(self, processing_task):
+        frames = self.get_valid_frames()
+        if not frames:
+            logger.debug(
+                "Event %r has no frames with enough context to " "fetch remote source",
+      ["event_id"],
+            )
+            return False
+        with sentry_sdk.start_span(
+            op="JavaScriptSmCacheStacktraceProcessor.preprocess_step.get_release"
+        ):
+            self.release = self.get_release(create=True)
+            if"dist") and self.release:
+                timestamp ="timestamp")
+                date = timestamp and datetime.fromtimestamp(timestamp).replace(tzinfo=timezone.utc)
+                self.dist = self.release.add_dist(["dist"], date)
+        with sentry_sdk.start_span(
+            op="JavaScriptSmCacheStacktraceProcessor.preprocess_step.populate_source_cache"
+        ):
+            self.populate_source_cache(frames)
+        return True
+    def handles_frame(self, frame, stacktrace_info):
+        platform = frame.get("platform") or"platform")
+        return platform in ("javascript", "node")
+    def preprocess_frame(self, processable_frame):
+        # Stores the resolved token.  This is used to cross refer to other
+        # frames for function name resolution by call site.
+ = {"token": None}
+    def process_frame(self, processable_frame, processing_task):
+        """
+        Attempt to demangle the given frame.
+        """
+        frame = processable_frame.frame
+        token = None
+        cache = self.cache
+        sourcemaps = self.sourcemaps
+        all_errors = []
+        sourcemap_applied = False
+        # can't demangle if there's no filename or line number present
+        if not frame.get("abs_path") or not frame.get("lineno"):
+            return
+        # also can't demangle node's internal modules
+        # therefore we only process user-land frames (starting with /)
+        # or those created by bundle/webpack internals
+        if"platform") == "node" and not frame.get("abs_path").startswith(
+            ("/", "app:", "webpack:")
+        ):
+            return
+        errors = cache.get_errors(frame["abs_path"])
+        if errors:
+            all_errors.extend(errors)
+        # `source` is used for pre/post and `context_line` frame expansion.
+        # Here it's pointing to minified source, however the variable can be shadowed with the original sourceview
+        # (or `None` if the token doesnt provide us with the `context_line`) down the road.
+        source = self.get_sourceview(frame["abs_path"])
+        source_context = None
+        in_app = None
+        new_frame = dict(frame)
+        raw_frame = dict(frame)
+        sourcemap_url, sourcemap_cache = sourcemaps.get_link(frame["abs_path"])
+        self.sourcemaps_touched.add(sourcemap_url)
+        if sourcemap_cache and frame.get("colno") is None:
+            all_errors.append(
+                {"type": EventError.JS_NO_COLUMN, "url": http.expose_url(frame["abs_path"])}
+            )
+        elif sourcemap_cache:
+            if is_data_uri(sourcemap_url):
+                sourcemap_label = frame["abs_path"]
+            else:
+                sourcemap_label = sourcemap_url
+            sourcemap_label = http.expose_url(sourcemap_label)
+            try:
+                # Errors are 1-indexed in the frames.
+                assert frame["lineno"] > 0, "line numbers are 1-indexed"
+                token = sourcemap_cache.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
+            except Exception:
+                token = None
+                all_errors.append(
+                    {
+                        "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
+                        "column": frame.get("colno"),
+                        "row": frame.get("lineno"),
+                        "source": frame["abs_path"],
+                        "sourcemap": sourcemap_label,
+                    }
+                )
+            # persist the token so that we can find it later
+  ["token"] = token
+            # Store original data in annotation
+            new_frame["data"] = dict(
+                frame.get("data") or {}, sourcemap=sourcemap_label, smcache=True
+            )
+            sourcemap_applied = True
+            if token is not None:
+                if token.src is not None:
+                    abs_path = non_standard_url_join(sourcemap_url, token.src)
+                else:
+                    abs_path = frame["abs_path"]
+                logger.debug(
+                    "Mapping compressed source %r to mapping in %r", frame["abs_path"], abs_path
+                )
+                if token.context_line is not None:
+                    source_context = token.pre_context, token.context_line, token.post_context
+                else:
+                    source = self.get_sourceview(abs_path)
+                if source is None:
+                    errors = cache.get_errors(abs_path)
+                    if errors:
+                        all_errors.extend(errors)
+                    else:
+                        all_errors.append(
+                            {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(abs_path)}
+                        )
+                # The tokens are 1-indexed.
+                new_frame["lineno"] = token.line
+                new_frame["colno"] = token.col
+                new_frame["function"] = get_function_for_token(
+                    new_frame, token, processable_frame.previous_frame
+                )
+                filename = token.src
+                # special case webpack support
+                # abs_path will always be the full path with webpack:/// prefix.
+                # filename will be relative to that
+                if abs_path.startswith("webpack:"):
+                    filename = abs_path
+                    # webpack seems to use ~ to imply "relative to resolver root"
+                    # which is generally seen for third party deps
+                    # (i.e. node_modules)
+                    if "/~/" in filename:
+                        filename = "~/" + abs_path.split("/~/", 1)[-1]
+                    elif WEBPACK_NAMESPACE_RE.match(filename):
+                        filename = re.sub(WEBPACK_NAMESPACE_RE, "./", abs_path)
+                    else:
+                        filename = filename.split("webpack:///", 1)[-1]
+                    # As noted above:
+                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
+                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
+                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
+                    #   eg. webpack:///webpack/bootstrap, webpack:///external
+                    if (
+                        filename.startswith("~/")
+                        or "/node_modules/" in filename
+                        or not filename.startswith("./")
+                    ):
+                        in_app = False
+                    # And conversely, local dependencies start with './'
+                    elif filename.startswith("./"):
+                        in_app = True
+                    # We want to explicitly generate a webpack module name
+                    new_frame["module"] = generate_module(filename)
+                # while you could technically use a subpath of 'node_modules' for your libraries,
+                # it would be an extremely complicated decision and we've not seen anyone do it
+                # so instead we assume if node_modules is in the path its part of the vendored code
+                elif "/node_modules/" in abs_path:
+                    in_app = False
+                if abs_path.startswith("app:"):
+                    if filename and
+                        in_app = False
+                    else:
+                        in_app = True
+                new_frame["abs_path"] = abs_path
+                new_frame["filename"] = filename
+                if not frame.get("module") and abs_path.startswith(
+                    ("http:", "https:", "webpack:", "app:")
+                ):
+                    new_frame["module"] = generate_module(abs_path)
+        elif sourcemap_url:
+            new_frame["data"] = dict(
+                new_frame.get("data") or {}, sourcemap=http.expose_url(sourcemap_url)
+            )
+        changed_frame = self.expand_frame(new_frame, source_context=source_context, source=source)
+        # If we did not manage to match but we do have a line or column
+        # we want to report an error here.
+        if not new_frame.get("context_line") and source and new_frame.get("colno") is not None:
+            all_errors.append(
+                {
+                    "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
+                    "column": new_frame["colno"],
+                    "row": new_frame["lineno"],
+                    "source": new_frame["abs_path"],
+                }
+            )
+        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
+        if sourcemap_applied or all_errors or changed_frame or changed_raw:
+            # In case we are done processing, we iterate over all errors that we got
+            # and we filter out all `JS_MISSING_SOURCE` errors since we consider if we have
+            # a `context_line` we have a symbolicated frame and we don't need to show the error
+            has_context_line = bool(new_frame.get("context_line"))
+            if has_context_line:
+                all_errors[:] = [
+                    x for x in all_errors if x.get("type") is not EventError.JS_MISSING_SOURCE
+                ]
+            if in_app is not None:
+                new_frame["in_app"] = in_app
+                raw_frame["in_app"] = in_app
+            new_frames = [new_frame]
+            raw_frames = [raw_frame] if changed_raw else None
+            try:
+                if features.has(
+                    "organizations:javascript-console-error-tag", self.organization, actor=None
+                ):
+                    self.tag_suspected_console_errors(new_frames)
+            except Exception as exc:
+                logger.exception("Failed to tag suspected console errors", exc_info=exc)
+            return new_frames, raw_frames, all_errors
+    def tag_suspected_console_errors(self, new_frames):
+        def tag_error(new_frames):
+            suspected_console_errors = None
+            try:
+                suspected_console_errors = self.suspected_console_errors(new_frames)
+            except Exception as exc:
+                logger.error(
+                    "Failed to evaluate event for suspected JavaScript browser console error",
+                    exc_info=exc,
+                )
+            try:
+                set_tag(, "empty_stacktrace.js_console", suspected_console_errors)
+            except Exception as exc:
+                logger.error(
+                    "Failed to tag event with empty_stacktrace.js_console=%s for suspected JavaScript browser console error",
+                    suspected_console_errors,
+                    exc_info=exc,
+                )
+        try:
+            if features.has(
+                "organizations:javascript-console-error-tag", self.organization, actor=None
+            ):
+                tag_error(new_frames)
+        except Exception as exc:
+            logger.exception("Failed to tag suspected console errors", exc_info=exc)
+    def expand_frame(self, frame, source_context=None, source=None):
+        """
+        Mutate the given frame to include pre- and post-context lines.
+        """
+        if frame.get("lineno") is None:
+            return False
+        if source_context is None:
+            source = source or self.get_sourceview(frame["abs_path"])
+            if source is None:
+                logger.debug("No source found for %s", frame["abs_path"])
+                return False
+        (pre_context, context_line, post_context) = source_context or get_source_context(
+            source=source, lineno=frame["lineno"]
+        )
+        if pre_context is not None and len(pre_context) > 0:
+            frame["pre_context"] = [trim_line(x) for x in pre_context]
+        if context_line is not None:
+            frame["context_line"] = trim_line(context_line, frame.get("colno") or 0)
+        if post_context is not None and len(post_context) > 0:
+            frame["post_context"] = [trim_line(x) for x in post_context]
+        return True
+    def get_sourceview(self, filename):
+        if filename not in self.cache:
+            self.cache_source(filename)
+        return self.cache.get(filename)
+    def cache_source(self, filename):
+        """
+        Look for and (if found) cache a source file and its associated source
+        map (if any).
+        """
+        sourcemaps = self.sourcemaps
+        cache = self.cache
+        self.fetch_count += 1
+        if self.fetch_count > self.max_fetches:
+            cache.add_error(filename, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
+            return
+        # TODO: respect cache-control/max-age headers to some extent
+        logger.debug("Attempting to cache source %r", filename)
+        try:
+            # this both looks in the database and tries to scrape the internet
+            with sentry_sdk.start_span(
+                op="JavaScriptSmCacheStacktraceProcessor.cache_source.fetch_file"
+            ) as span:
+                span.set_data("filename", filename)
+                result = fetch_file(
+                    filename,
+                    project=self.project,
+                    release=self.release,
+                    dist=self.dist,
+                    allow_scraping=self.allow_scraping,
+                )
+        except http.BadSource as exc:
+            # most people don't upload release artifacts for their third-party libraries,
+            # so ignore missing node_modules files
+            if["type"] == EventError.JS_MISSING_SOURCE and "node_modules" in filename:
+                pass
+            else:
+                cache.add_error(filename,
+            # either way, there's no more for us to do here, since we don't have
+            # a valid file to cache
+            return
+        cache.add(filename, result.body, result.encoding)
+        cache.alias(result.url, filename)
+        sourcemap_url = discover_sourcemap(result)
+        if not sourcemap_url:
+            return
+        logger.debug(
+            "Found sourcemap URL %r for minified script %r", sourcemap_url[:256], result.url
+        )
+, sourcemap_url)
+        if sourcemap_url in sourcemaps:
+            return
+        # pull down sourcemap
+        try:
+            with sentry_sdk.start_span(
+                op="JavaScriptSmCacheStacktraceProcessor.cache_source.fetch_sourcemap"
+            ) as span:
+                span.set_data("sourcemap_url", sourcemap_url)
+                sourcemap_view = fetch_sourcemap(
+                    sourcemap_url,
+                    source=result.body,
+                    project=self.project,
+                    release=self.release,
+                    dist=self.dist,
+                    allow_scraping=self.allow_scraping,
+                )
+        except http.BadSource as exc:
+            # we don't perform the same check here as above, because if someone has
+            # uploaded a node_modules file, which has a sourceMappingURL, they
+            # presumably would like it mapped (and would like to know why it's not
+            # working, if that's the case). If they're not looking for it to be
+            # mapped, then they shouldn't be uploading the source file in the
+            # first place.
+            cache.add_error(filename,
+            return
+        with sentry_sdk.start_span(
+            op="JavaScriptSmCacheStacktraceProcessor.cache_source.cache_sourcemap_view"
+        ) as span:
+            sourcemaps.add(sourcemap_url, sourcemap_view)
+    def populate_source_cache(self, frames):
+        """
+        Fetch all sources that we know are required (being referenced directly
+        in frames).
+        """
+        pending_file_list = set()
+        for f in frames:
+            # We can't even attempt to fetch source if abs_path is None
+            if f.get("abs_path") is None:
+                continue
+            # tbh not entirely sure how this happens, but raven-js allows this
+            # to be caught. I think this comes from dev consoles and whatnot
+            # where there is no page. This just bails early instead of exposing
+            # a fetch error that may be confusing.
+            if f["abs_path"] == "<anonymous>":
+                continue
+            # we cannot fetch any other files than those uploaded by user
+            if"platform") == "node" and not f.get("abs_path").startswith("app:"):
+                continue
+            pending_file_list.add(f["abs_path"])
+        for idx, filename in enumerate(pending_file_list):
+            with sentry_sdk.start_span(
+                op="JavaScriptSmCacheStacktraceProcessor.populate_source_cache.cache_source"
+            ) as span:
+                span.set_data("filename", filename)
+                self.cache_source(filename=filename)
+    def close(self):
+        StacktraceProcessor.close(self)
+        if self.sourcemaps_touched:
+            metrics.incr(
+                "sourcemaps.processed", amount=len(self.sourcemaps_touched), skip_internal=True
+            )
+    def suspected_console_errors(self, frames):
+        def is_suspicious_frame(frame) -> bool:
+            function = frame.get("function", None)
+            filename = frame.get("filename", None)
+            return function == "?" and filename == "<anonymous>"
+        def has_suspicious_frames(frames) -> bool:
+            if len(frames) == 2 and is_suspicious_frame(frames[0]):
+                return True
+            return all(is_suspicious_frame(frame) for frame in frames)
+        for info in self.stacktrace_infos:
+            is_exception = info.is_exception and info.container
+            mechanism = info.container.get("mechanism") if is_exception else None
+            error_type = info.container.get("type") if is_exception else None
+            if (
+                not frames
+                or not mechanism
+                or mechanism.get("type") != "onerror"
+                or mechanism.get("handled")
+            ):
+                return False
+            has_short_stacktrace = len(frames) <= 2
+            is_suspicious_error = error_type.lower() in [
+                "syntaxerror",
+                "referenceerror",
+                "typeerror",
+            ]
+            return has_short_stacktrace and is_suspicious_error and has_suspicious_frames(frames)
+        return False

+ 0 - 62

@@ -79,65 +79,3 @@ class ExampleTestCase(RelayStoreHelper, TransactionTestCase):
         assert frame_list[3].function == "onFailure"
         assert frame_list[3].lineno == 5
         assert frame_list[3].filename == "test.js"
-    # TODO(smcache): Remove this test once SmCache is integrated.
-    @responses.activate
-    def test_smcache_processed_frame(self):
-        with self.feature("projects:sourcemapcache-processor"):
-            responses.add(
-                responses.GET,
-                "",
-                body=load_fixture("test.js"),
-                content_type="application/javascript",
-            )
-            responses.add(
-                responses.GET,
-                "",
-                body=load_fixture("test.min.js"),
-                content_type="application/javascript",
-            )
-            responses.add(
-                responses.GET,
-                "",
-                body=load_fixture(""),
-                content_type="application/json",
-            )
-            responses.add(
-                responses.GET, "", body="Not Found", status=404
-            )
-            min_ago = iso_format(before_now(minutes=1))
-            data = {
-                "timestamp": min_ago,
-                "message": "hello",
-                "platform": "javascript",
-                "exception": {
-                    "values": [
-                        {
-                            "type": "Error",
-                            "stacktrace": {
-                                "frames": json.loads(load_fixture("minifiedError.json"))[::-1]
-                            },
-                        }
-                    ]
-                },
-            }
-            event = self.post_and_retrieve_event(data)
-            exception = event.interfaces["exception"]
-            frame_list = exception.values[0].stacktrace.frames
-            assert len(frame_list) == 4
-            # First frame is coming from <script> tag, so its not mapped.
-            for frame in frame_list[1:]:
-                smcache_frame ="smcache_frame")
-                assert smcache_frame.get("function") == frame.function
-                assert smcache_frame.get("lineno") == frame.lineno
-                assert smcache_frame.get("colno") == frame.colno
-                assert smcache_frame.get("filename") == frame.filename
-                assert smcache_frame.get("pre_context") == frame.pre_context
-                assert smcache_frame.get("post_context") == frame.post_context
-                assert smcache_frame.get("context_line") == frame.context_line

+ 2 - 10

@@ -151,11 +151,7 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
         mock_fetch_file.return_value.encoding = None
         mock_fetch_file.return_value.headers = {}
-        # TODO(smcache): We make sure that the tests are run without the feature to preserve correct mock assertions.
-        # It will work just fine when we migrate to SmCache, as call count will stay the same with the new processor.
-        # Note its been called twice, as there as two processors when run with the feature.
-        with self.feature({"projects:sourcemapcache-processor": False}):
-            event = self.post_and_retrieve_event(data)
+        event = self.post_and_retrieve_event(data)
@@ -213,11 +209,7 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
         mock_fetch_file.return_value.body = force_bytes("\n".join("<generated source>"))
         mock_fetch_file.return_value.encoding = None
-        # TODO(smcache): We make sure that the tests are run without the feature to preserve correct mock assertions.
-        # It will work just fine when we migrate to SmCache, as call count will stay the same with the new processor.
-        # Note its been called twice, as there as two processors when run with the feature.
-        with self.feature({"projects:sourcemapcache-processor": False}):
-            event = self.post_and_retrieve_event(data)
+        event = self.post_and_retrieve_event(data)

+ 38 - 11

@@ -9,6 +9,7 @@ from unittest.mock import ANY, MagicMock, call, patch
 import pytest
 import responses
 from requests.exceptions import RequestException
+from symbolic import SourceMapTokenMatch
 from sentry import http, options
 from sentry.event_manager import get_tag
@@ -25,16 +26,17 @@ from sentry.lang.javascript.processor import (
-    get_function_for_token,
+from sentry.lang.javascript.processor_smcache import fetch_sourcemap as fetch_sourcemap_smcache
+from sentry.lang.javascript.processor_smcache import get_function_for_token
 from sentry.models import EventError, File, Release, ReleaseFile
 from sentry.models.releasefile import ARTIFACT_INDEX_FILENAME, update_artifact_index
-from sentry.stacktraces.processing import find_stacktraces_in_data
+from sentry.stacktraces.processing import ProcessableFrame, find_stacktraces_in_data
 from sentry.testutils import TestCase
 from sentry.testutils.helpers.features import with_feature
 from sentry.testutils.helpers.options import override_options
@@ -1113,33 +1115,58 @@ class GetFunctionForTokenTest(unittest.TestCase):
         return Token(fn_name, token_name)
+    def get_frame(self, frame):
+        processable_frame = ProcessableFrame(frame, 0, None, None, None)
+ = {"token": None}
+        return processable_frame
     def test_valid_name(self):
-        frame = {"function": "original"}
+        frame = self.get_frame({"function": "original"})
         token = self.get_token("lookedup")
         assert get_function_for_token(frame, token) == "lookedup"
     def test_fallback_to_previous_frames_token_if_useless_name(self):
-        previous_frame = {"data": {"token": self.get_token("previous_fn", "previous_name")}}
-        frame = {"function": None}
+        previous_frame = self.get_frame({})
+["token"] = self.get_token("previous_fn", "previous_name")
+        frame = self.get_frame({"function": None})
         token = self.get_token("__webpack_require__")
         assert get_function_for_token(frame, token, previous_frame) == "previous_name"
     def test_fallback_to_useless_name(self):
-        previous_frame = {"data": {"token": None}}
-        frame = {"function": None}
+        previous_frame = self.get_frame({"data": {"token": None}})
+        frame = self.get_frame({"function": None})
         token = self.get_token("__webpack_require__")
         assert get_function_for_token(frame, token, previous_frame) == "__webpack_require__"
     def test_fallback_to_original_name(self):
-        previous_frame = {"data": {"token": None}}
-        frame = {"function": "original"}
+        previous_frame = self.get_frame({"data": {"token": None}})
+        frame = self.get_frame({"function": "original"})
         token = self.get_token("__webpack_require__")
         assert get_function_for_token(frame, token, previous_frame) == "original"
 class FetchSourcemapTest(TestCase):
+    # TODO(smcache): Remove non-`_smcache` tests once we migrate to smcache only.
     def test_simple_base64(self):
         smap_view = fetch_sourcemap(base64_sourcemap)
+        tokens = [SourceMapTokenMatch(0, 0, 1, 0, src="/test.js", src_id=0)]
+        assert list(smap_view) == tokens
+        sv = smap_view.get_sourceview(0)
+        assert sv.get_source() == 'console.log("hello, World!")'
+        assert smap_view.get_source_name(0) == "/test.js"
+    def test_base64_without_padding(self):
+        smap_view = fetch_sourcemap(base64_sourcemap.rstrip("="))
+        tokens = [SourceMapTokenMatch(0, 0, 1, 0, src="/test.js", src_id=0)]
+        assert list(smap_view) == tokens
+        sv = smap_view.get_sourceview(0)
+        assert sv.get_source() == 'console.log("hello, World!")'
+        assert smap_view.get_source_name(0) == "/test.js"
+    def test_simple_base64_smcache(self):
+        smap_view = fetch_sourcemap_smcache(base64_sourcemap)
         token = smap_view.lookup(1, 1, 0)
         assert token.src == "/test.js"
@@ -1147,8 +1174,8 @@ class FetchSourcemapTest(TestCase):
         assert token.col == 1
         assert token.context_line == 'console.log("hello, World!")'
-    def test_base64_without_padding(self):
-        smap_view = fetch_sourcemap(base64_sourcemap.rstrip("="))
+    def test_base64_without_padding_smcache(self):
+        smap_view = fetch_sourcemap_smcache(base64_sourcemap.rstrip("="))
         token = smap_view.lookup(1, 1, 0)
         assert token.src == "/test.js"