Просмотр исходного кода

ref(processor): Use symbolic-sourcemapcache for JavaScript Sourcemap processing (#38551)

This PR attempts to replace the currently used `rust-sourcemap` crate
and it's symbolic python bindings, with `symbolic-sourcemapcache` crate.

It makes the whole processing pipeline easier to maintain, as it pushes
some work directly to Symbolic, as well as we get better function names
due to better scope resolution and in some cases better file URLs.

Other than that, we don't use `SourceView` anymore, as it seemed like an
unnecessary layer of abstraction for something that is used only for
`context_lines` extraction. We cache `utf-8` decoded sources directly
now, as this way we can encode them only once for `SmCache` instance
initialization, and use the source directly otherwise for context lines
extraction.

Some tests had to updated to express current behavior.

The notable thing is `useless_fn_names = ["<anonymous>",
"__webpack_require__", "__webpack_modules__"]`, which is mostly for
`production` mode of webpack, that by default trims all the function
names, and we decided to fallback to the minified names in those cases
instead (this was already the old behavior).

It should be possible to extract something better, but we'd need to
parse all `sourceContents` from sourcemap to do that, as the only thing
we can get better function name for the case mentioned above, is if we
look at the right-hand side of default node export, in form of
`module.exports = function foo () {}`. This should give us `foo`, yet
the only thing we can extract is `module.exports`, as minified form of
this expression in webpack production mode is `module.exports = function
() {}`.
Kamil Ogórek 2 лет назад
Родитель
Сommit
ae9c0d8a33

+ 1 - 1
requirements-base.txt

@@ -61,7 +61,7 @@ snuba-sdk>=1.0.1
 simplejson>=3.17.6
 statsd>=3.3
 structlog>=21.1.0
-symbolic>=8.7.1
+symbolic>=9.2.0
 toronado>=0.1.0
 typing-extensions>=3.10.0.2
 ua-parser>=0.10.0

+ 1 - 1
requirements-dev-frozen.txt

@@ -158,7 +158,7 @@ soupsieve==2.3.2.post1
 sqlparse==0.3.0
 statsd==3.3
 structlog==21.1.0
-symbolic==8.7.1
+symbolic==9.2.0
 tokenize-rt==4.2.1
 toml==0.10.2
 tomli==2.0.1

+ 1 - 1
requirements-frozen.txt

@@ -111,7 +111,7 @@ soupsieve==2.3.2.post1
 sqlparse==0.3.0
 statsd==3.3
 structlog==21.1.0
-symbolic==8.7.1
+symbolic==9.2.0
 toronado==0.1.0
 trio==0.21.0
 trio-websocket==0.9.2

+ 2 - 0
src/sentry/conf/server.py

@@ -1214,6 +1214,8 @@ SENTRY_FEATURES = {
     "projects:rate-limits": True,
     # Enable functionality to trigger service hooks upon event ingestion.
     "projects:servicehooks": False,
+    # Enable use of symbolic-sourcemapcache for JavaScript Source Maps processing
+    "projects:sourcemapcache-processor": False,
     # Enable suspect resolutions feature
     "projects:suspect-resolutions": False,
     # Use Kafka (instead of Celery) for ingestion pipeline.

+ 1 - 0
src/sentry/features/__init__.py

@@ -209,6 +209,7 @@ default_manager.add("projects:similarity-indexing", ProjectFeature)
 default_manager.add("projects:similarity-indexing-v2", ProjectFeature)
 default_manager.add("projects:similarity-view", ProjectFeature)
 default_manager.add("projects:similarity-view-v2", ProjectFeature)
+default_manager.add("projects:sourcemapcache-processor", ProjectFeature, True)
 default_manager.add("projects:suspect-resolutions", ProjectFeature, True)
 
 # Project plugin features

+ 333 - 23
src/sentry/lang/javascript/processor.py

@@ -16,10 +16,10 @@ from django.conf import settings
 from django.utils import timezone
 from django.utils.encoding import force_bytes, force_text
 from requests.utils import get_encoding_from_headers
+from symbolic import SourceMapCache as SmCache
 from symbolic import SourceMapView
 
-from sentry import http, options
-from sentry.interfaces.stacktrace import Stacktrace
+from sentry import features, http, options
 from sentry.models import EventError, Organization, ReleaseFile
 from sentry.models.releasefile import ARTIFACT_INDEX_FILENAME, ReleaseArchive, read_artifact_index
 from sentry.stacktraces.processing import StacktraceProcessor
@@ -33,7 +33,7 @@ from sentry.utils.files import compress_file
 from sentry.utils.hashlib import md5_text
 from sentry.utils.http import is_valid_origin
 from sentry.utils.retries import ConditionalRetryPolicy, exponential_delay
-from sentry.utils.safe import get_path
+from sentry.utils.safe import get_path, set_path
 from sentry.utils.urls import non_standard_url_join
 
 from .cache import SourceCache, SourceMapCache
@@ -46,6 +46,9 @@ LINES_OF_CONTEXT = 5
 BASE64_SOURCEMAP_PREAMBLE = "data:application/json;base64,"
 BASE64_PREAMBLE_LENGTH = len(BASE64_SOURCEMAP_PREAMBLE)
 UNKNOWN_MODULE = "<unknown module>"
+# Names that do not provide any reasonable value, and that can possibly obstruct
+# better available names. In case we encounter one, we fallback to current frame fn name if available.
+USELESS_FN_NAMES = ["<anonymous>", "__webpack_require__", "__webpack_modules__"]
 CLEAN_MODULE_RE = re.compile(
     r"""^
 (?:/|  # Leading slashes
@@ -82,6 +85,13 @@ class UnparseableSourcemap(http.BadSource):
     error_type = EventError.JS_INVALID_SOURCEMAP
 
 
+# TODO(smcache): Remove this function and all its usages.
+def should_run_smcache(cls):
+    return features.has("projects:sourcemapcache-processor", cls.project) and not isinstance(
+        cls, JavaScriptSmCacheStacktraceProcessor
+    )
+
+
 def trim_line(line, column=0):
     """
     Trims a line down to a goal of 140 characters, with a little
@@ -117,6 +127,7 @@ def trim_line(line, column=0):
     return line
 
 
+# TODO(smcache): Remove in favor of `get_raw_source_context` (remove _raw from its name too).
 def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT):
     if not source:
         return None, None, None
@@ -147,6 +158,36 @@ def get_source_context(source, lineno, colno, context=LINES_OF_CONTEXT):
     return pre_context or None, context_line, post_context or None
 
 
+def get_raw_source_context(source, lineno, context=LINES_OF_CONTEXT):
+    if not source:
+        return None, None, None
+
+    # lineno's in JS are 1-indexed
+    # just in case. sometimes math is hard
+    if lineno > 0:
+        lineno -= 1
+
+    lower_bound = max(0, lineno - context)
+    upper_bound = min(lineno + 1 + context, len(source))
+
+    try:
+        pre_context = source[lower_bound:lineno]
+    except IndexError:
+        pre_context = []
+
+    try:
+        context_line = source[lineno]
+    except IndexError:
+        context_line = ""
+
+    try:
+        post_context = source[(lineno + 1) : upper_bound]
+    except IndexError:
+        post_context = []
+
+    return pre_context or None, context_line, post_context or None
+
+
 def discover_sourcemap(result):
     """
     Given a UrlResult object, attempt to discover a sourcemap URL.
@@ -739,7 +780,10 @@ def get_max_age(headers):
     return min(max_age, CACHE_CONTROL_MAX)
 
 
-def fetch_sourcemap(url, project=None, release=None, dist=None, allow_scraping=True):
+# TODO(smcache): Remove unnecessary `use_smcache` flag.
+def fetch_sourcemap(
+    url, source=b"", project=None, release=None, dist=None, allow_scraping=True, use_smcache=True
+):
     if is_data_uri(url):
         try:
             body = base64.b64decode(
@@ -763,10 +807,18 @@ def fetch_sourcemap(url, project=None, release=None, dist=None, allow_scraping=T
             )
         body = result.body
     try:
-        with sentry_sdk.start_span(
-            op="JavaScriptStacktraceProcessor.fetch_sourcemap.SourceMapView.from_json_bytes"
-        ):
-            return SourceMapView.from_json_bytes(body)
+        # TODO(smcache): Remove unnecessary `use_smcache` flag and use `SmCache` only.
+        if use_smcache:
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor.fetch_sourcemap.SmCache.from_bytes"
+            ):
+                return SmCache.from_bytes(source, body)
+        else:
+            with sentry_sdk.start_span(
+                op="JavaScriptStacktraceProcessor.fetch_sourcemap.SourceMapView.from_json_bytes"
+            ):
+                return SourceMapView.from_json_bytes(body)
+
     except Exception as exc:
         # This is in debug because the product shows an error already.
         logger.debug(str(exc), exc_info=True)
@@ -808,6 +860,22 @@ def is_valid_frame(frame):
     return frame is not None and frame.get("lineno") is not None
 
 
+def get_function_for_token(frame, token):
+    """
+    Get function name for a given frame, based on the looked up token.
+    Return tokens name if we have a usable value from symbolic or we have no initial function name at all,
+    otherwise, fallback to frames current function name.
+    """
+
+    frame_function_name = frame.get("function")
+    token_function_name = token.function_name
+
+    if token_function_name not in USELESS_FN_NAMES or not frame_function_name:
+        return token_function_name
+
+    return frame_function_name
+
+
 class JavaScriptStacktraceProcessor(StacktraceProcessor):
     """
     Attempts to fetch source code for javascript frames.
@@ -850,14 +918,8 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
         self.release = None
         self.dist = None
 
-    def get_stacktraces(self, data):
-        exceptions = get_path(data, "exception", "values", filter=True, default=())
-        stacktraces = [e["stacktrace"] for e in exceptions if e.get("stacktrace")]
-
-        if "stacktrace" in data:
-            stacktraces.append(data["stacktrace"])
-
-        return [(s, Stacktrace.to_python(s)) for s in stacktraces]
+        if should_run_smcache(self):
+            self.smcache_processor = JavaScriptSmCacheStacktraceProcessor(*args, **kwargs)
 
     def get_valid_frames(self):
         # build list of frames that we can actually grab source for
@@ -886,6 +948,10 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
             op="JavaScriptStacktraceProcessor.preprocess_step.populate_source_cache"
         ):
             self.populate_source_cache(frames)
+
+        if should_run_smcache(self):
+            self.smcache_processor.preprocess_step(None)
+
         return True
 
     def handles_frame(self, frame, stacktrace_info):
@@ -1112,6 +1178,11 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
                 new_frame["in_app"] = in_app
                 raw_frame["in_app"] = in_app
 
+            # Run new processor only for frames that were actually modified in any way.
+            if should_run_smcache(self) and new_frame != raw_frame:
+                smcache_rv = self.smcache_processor.process_frame(processable_frame, None)
+                set_path(new_frame, "data", "smcache_frame", value=smcache_rv[0][0])
+
             new_frames = [new_frame]
             raw_frames = [raw_frame] if changed_raw else None
             return new_frames, raw_frames, all_errors
@@ -1202,10 +1273,13 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
                 span.set_data("sourcemap_url", sourcemap_url)
                 sourcemap_view = fetch_sourcemap(
                     sourcemap_url,
+                    source=result.body,
                     project=self.project,
                     release=self.release,
                     dist=self.dist,
                     allow_scraping=self.allow_scraping,
+                    # TODO(smcache): Remove unnecessary `use_smcache` flag.
+                    use_smcache=isinstance(self, JavaScriptSmCacheStacktraceProcessor),
                 )
         except http.BadSource as exc:
             # we don't perform the same check here as above, because if someone has
@@ -1220,15 +1294,18 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
         with sentry_sdk.start_span(
             op="JavaScriptStacktraceProcessor.cache_source.cache_sourcemap_view"
         ) as span:
-            span.set_data("source_count", sourcemap_view.source_count)
-
             sourcemaps.add(sourcemap_url, sourcemap_view)
 
-            # cache any inlined sources
-            for src_id, source_name in sourcemap_view.iter_sources():
-                source_view = sourcemap_view.get_sourceview(src_id)
-                if source_view is not None:
-                    self.cache.add(non_standard_url_join(sourcemap_url, source_name), source_view)
+            # TODO(smcache): Remove this whole iteration block
+            if not isinstance(self, JavaScriptSmCacheStacktraceProcessor):
+                span.set_data("source_count", sourcemap_view.source_count)
+                # cache any inlined sources
+                for src_id, source_name in sourcemap_view.iter_sources():
+                    source_view = sourcemap_view.get_sourceview(src_id)
+                    if source_view is not None:
+                        self.cache.add(
+                            non_standard_url_join(sourcemap_url, source_name), source_view
+                        )
 
     def populate_source_cache(self, frames):
         """
@@ -1264,3 +1341,236 @@ class JavaScriptStacktraceProcessor(StacktraceProcessor):
             metrics.incr(
                 "sourcemaps.processed", amount=len(self.sourcemaps_touched), skip_internal=True
             )
+
+
+class JavaScriptSmCacheStacktraceProcessor(JavaScriptStacktraceProcessor):
+    """
+    Modern SourceMap processor using symbolic-sourcemapcache.
+    Subclass of `JavaScriptStacktraceProcessor` with only changed methods overwritten.
+    To make it a default, change replace all `JavaScriptStacktraceProcessor` methods with
+    those from this class instead.
+    """
+
+    def __init__(self, *args, **kwargs):
+        JavaScriptStacktraceProcessor.__init__(self, *args, **kwargs)
+
+    def process_frame(self, processable_frame, processing_task):
+        """
+        Attempt to demangle the given frame.
+        """
+        frame = processable_frame.frame
+        token = None
+
+        cache = self.cache
+        sourcemaps = self.sourcemaps
+        all_errors = []
+        sourcemap_applied = False
+
+        # can't demangle if there's no filename or line number present
+        if not frame.get("abs_path") or not frame.get("lineno"):
+            return
+
+        # also can't demangle node's internal modules
+        # therefore we only process user-land frames (starting with /)
+        # or those created by bundle/webpack internals
+        if self.data.get("platform") == "node" and not frame.get("abs_path").startswith(
+            ("/", "app:", "webpack:")
+        ):
+            return
+
+        errors = cache.get_errors(frame["abs_path"])
+        if errors:
+            all_errors.extend(errors)
+
+        # `source` is used for pre/post and `context_line` frame expansion.
+        # Here it's pointing to minified source, however the variable can be shadowed with the original sourceview
+        # (or `None` if the token doesnt provide us with the `context_line`) down the road.
+        source = self.get_sourceview(frame["abs_path"])
+        source_context = None
+
+        in_app = None
+        new_frame = dict(frame)
+        raw_frame = dict(frame)
+
+        sourcemap_url, sourcemap_cache = sourcemaps.get_link(frame["abs_path"])
+        self.sourcemaps_touched.add(sourcemap_url)
+
+        if sourcemap_cache and frame.get("colno") is None:
+            all_errors.append(
+                {"type": EventError.JS_NO_COLUMN, "url": http.expose_url(frame["abs_path"])}
+            )
+        elif sourcemap_cache:
+            if is_data_uri(sourcemap_url):
+                sourcemap_label = frame["abs_path"]
+            else:
+                sourcemap_label = sourcemap_url
+
+            sourcemap_label = http.expose_url(sourcemap_label)
+
+            try:
+                # Errors are 1-indexed in the frames.
+                assert frame["lineno"] > 0, "line numbers are 1-indexed"
+                token = sourcemap_cache.lookup(frame["lineno"], frame["colno"], LINES_OF_CONTEXT)
+            except Exception:
+                token = None
+                all_errors.append(
+                    {
+                        "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
+                        "column": frame.get("colno"),
+                        "row": frame.get("lineno"),
+                        "source": frame["abs_path"],
+                        "sourcemap": sourcemap_label,
+                    }
+                )
+
+            # Store original data in annotation
+            new_frame["data"] = dict(frame.get("data") or {}, sourcemap=sourcemap_label)
+
+            sourcemap_applied = True
+
+            if token is not None:
+                if token.src is not None:
+                    abs_path = non_standard_url_join(sourcemap_url, token.src)
+                else:
+                    abs_path = frame["abs_path"]
+
+                logger.debug(
+                    "Mapping compressed source %r to mapping in %r", frame["abs_path"], abs_path
+                )
+
+                if token.context_line is not None:
+                    source_context = token.pre_context, token.context_line, token.post_context
+                else:
+                    source = self.get_sourceview(abs_path)
+
+                if source is None:
+                    errors = cache.get_errors(abs_path)
+                    if errors:
+                        all_errors.extend(errors)
+                    else:
+                        all_errors.append(
+                            {"type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(abs_path)}
+                        )
+
+                # The tokens are 1-indexed.
+                new_frame["lineno"] = token.line
+                new_frame["colno"] = token.col
+                new_frame["function"] = get_function_for_token(new_frame, token)
+
+                filename = token.src
+                # special case webpack support
+                # abs_path will always be the full path with webpack:/// prefix.
+                # filename will be relative to that
+                if abs_path.startswith("webpack:"):
+                    filename = abs_path
+                    # webpack seems to use ~ to imply "relative to resolver root"
+                    # which is generally seen for third party deps
+                    # (i.e. node_modules)
+                    if "/~/" in filename:
+                        filename = "~/" + abs_path.split("/~/", 1)[-1]
+                    elif WEBPACK_NAMESPACE_RE.match(filename):
+                        filename = re.sub(WEBPACK_NAMESPACE_RE, "./", abs_path)
+                    else:
+                        filename = filename.split("webpack:///", 1)[-1]
+
+                    # As noted above:
+                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
+                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
+                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
+                    #   eg. webpack:///webpack/bootstrap, webpack:///external
+                    if (
+                        filename.startswith("~/")
+                        or "/node_modules/" in filename
+                        or not filename.startswith("./")
+                    ):
+                        in_app = False
+                    # And conversely, local dependencies start with './'
+                    elif filename.startswith("./"):
+                        in_app = True
+                    # We want to explicitly generate a webpack module name
+                    new_frame["module"] = generate_module(filename)
+
+                # while you could technically use a subpath of 'node_modules' for your libraries,
+                # it would be an extremely complicated decision and we've not seen anyone do it
+                # so instead we assume if node_modules is in the path its part of the vendored code
+                elif "/node_modules/" in abs_path:
+                    in_app = False
+
+                if abs_path.startswith("app:"):
+                    if filename and NODE_MODULES_RE.search(filename):
+                        in_app = False
+                    else:
+                        in_app = True
+
+                new_frame["abs_path"] = abs_path
+                new_frame["filename"] = filename
+                if not frame.get("module") and abs_path.startswith(
+                    ("http:", "https:", "webpack:", "app:")
+                ):
+                    new_frame["module"] = generate_module(abs_path)
+
+        elif sourcemap_url:
+            new_frame["data"] = dict(
+                new_frame.get("data") or {}, sourcemap=http.expose_url(sourcemap_url)
+            )
+
+        changed_frame = self.expand_frame(new_frame, source_context=source_context, source=source)
+
+        # If we did not manage to match but we do have a line or column
+        # we want to report an error here.
+        if not new_frame.get("context_line") and source and new_frame.get("colno") is not None:
+            all_errors.append(
+                {
+                    "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
+                    "column": new_frame["colno"],
+                    "row": new_frame["lineno"],
+                    "source": new_frame["abs_path"],
+                }
+            )
+
+        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
+
+        if sourcemap_applied or all_errors or changed_frame or changed_raw:
+            # In case we are done processing, we iterate over all errors that we got
+            # and we filter out all `JS_MISSING_SOURCE` errors since we consider if we have
+            # a `context_line` we have a symbolicated frame and we don't need to show the error
+            has_context_line = bool(new_frame.get("context_line"))
+            if has_context_line:
+                all_errors[:] = [
+                    x for x in all_errors if x.get("type") is not EventError.JS_MISSING_SOURCE
+                ]
+
+            if in_app is not None:
+                new_frame["in_app"] = in_app
+                raw_frame["in_app"] = in_app
+
+            new_frames = [new_frame]
+            raw_frames = [raw_frame] if changed_raw else None
+            return new_frames, raw_frames, all_errors
+
+    def expand_frame(self, frame, source_context=None, source=None):
+        """
+        Mutate the given frame to include pre- and post-context lines.
+        """
+
+        if frame.get("lineno") is None:
+            return False
+
+        if source_context is None:
+            source = source or self.get_sourceview(frame["abs_path"])
+            if source is None:
+                logger.debug("No source found for %s", frame["abs_path"])
+                return False
+
+        (pre_context, context_line, post_context) = source_context or get_raw_source_context(
+            source=source, lineno=frame["lineno"]
+        )
+
+        if pre_context is not None and len(pre_context) > 0:
+            frame["pre_context"] = [trim_line(x) for x in pre_context]
+        if context_line is not None:
+            frame["context_line"] = trim_line(context_line, frame.get("colno") or 0)
+        if post_context is not None and len(post_context) > 0:
+            frame["post_context"] = [trim_line(x) for x in post_context]
+
+        return True

+ 62 - 0
tests/relay_integration/lang/javascript/test_example.py

@@ -79,3 +79,65 @@ class ExampleTestCase(RelayStoreHelper, TransactionTestCase):
         assert frame_list[3].function == "onFailure"
         assert frame_list[3].lineno == 5
         assert frame_list[3].filename == "test.js"
+
+    # TODO(smcache): Remove this test once SmCache is integrated.
+    @responses.activate
+    def test_smcache_processed_frame(self):
+        with self.feature("projects:sourcemapcache-processor"):
+            responses.add(
+                responses.GET,
+                "http://example.com/test.js",
+                body=load_fixture("test.js"),
+                content_type="application/javascript",
+            )
+            responses.add(
+                responses.GET,
+                "http://example.com/test.min.js",
+                body=load_fixture("test.min.js"),
+                content_type="application/javascript",
+            )
+            responses.add(
+                responses.GET,
+                "http://example.com/test.map",
+                body=load_fixture("test.map"),
+                content_type="application/json",
+            )
+            responses.add(
+                responses.GET, "http://example.com/index.html", body="Not Found", status=404
+            )
+
+            min_ago = iso_format(before_now(minutes=1))
+
+            data = {
+                "timestamp": min_ago,
+                "message": "hello",
+                "platform": "javascript",
+                "exception": {
+                    "values": [
+                        {
+                            "type": "Error",
+                            "stacktrace": {
+                                "frames": json.loads(load_fixture("minifiedError.json"))[::-1]
+                            },
+                        }
+                    ]
+                },
+            }
+
+            event = self.post_and_retrieve_event(data)
+
+            exception = event.interfaces["exception"]
+            frame_list = exception.values[0].stacktrace.frames
+
+            assert len(frame_list) == 4
+
+            # First frame is coming from <script> tag, so its not mapped.
+            for frame in frame_list[1:]:
+                smcache_frame = frame.data.get("smcache_frame")
+                assert smcache_frame.get("function") == frame.function
+                assert smcache_frame.get("lineno") == frame.lineno
+                assert smcache_frame.get("colno") == frame.colno
+                assert smcache_frame.get("filename") == frame.filename
+                assert smcache_frame.get("pre_context") == frame.pre_context
+                assert smcache_frame.get("post_context") == frame.post_context
+                assert smcache_frame.get("context_line") == frame.context_line

+ 18 - 11
tests/relay_integration/lang/javascript/test_plugin.py

@@ -151,7 +151,11 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
         mock_fetch_file.return_value.encoding = None
         mock_fetch_file.return_value.headers = {}
 
-        event = self.post_and_retrieve_event(data)
+        # TODO(smcache): We make sure that the tests are run without the feature to preserve correct mock assertions.
+        # It will work just fine when we migrate to SmCache, as call count will stay the same with the new processor.
+        # Note its been called twice, as there as two processors when run with the feature.
+        with self.feature({"projects:sourcemapcache-processor": False}):
+            event = self.post_and_retrieve_event(data)
 
         mock_fetch_file.assert_called_once_with(
             "http://example.com/foo.js",
@@ -209,7 +213,11 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
         mock_fetch_file.return_value.body = force_bytes("\n".join("<generated source>"))
         mock_fetch_file.return_value.encoding = None
 
-        event = self.post_and_retrieve_event(data)
+        # TODO(smcache): We make sure that the tests are run without the feature to preserve correct mock assertions.
+        # It will work just fine when we migrate to SmCache, as call count will stay the same with the new processor.
+        # Note its been called twice, as there as two processors when run with the feature.
+        with self.feature({"projects:sourcemapcache-processor": False}):
+            event = self.post_and_retrieve_event(data)
 
         mock_fetch_file.assert_called_once_with(
             "http://example.com/test.min.js",
@@ -476,6 +484,7 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
 
         assert len(frame_list) == 1
         frame = frame_list[0]
+        assert frame.abs_path == "app:///nofiles.js.map"
         assert frame.pre_context == ["function multiply(a, b) {", '\t"use strict";']
         assert frame.context_line == "\treturn a * b;"
         assert frame.post_context == [
@@ -485,6 +494,11 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
             "\ttry {",
             "\t\treturn multiply(add(a, b), a, b) / c;",
         ]
+        # TODO(smcache): Assertions below are the one that're correct. Use it when migrating from legacy processor.
+        # assert frame.abs_path == "app:///nofiles.js"
+        # assert frame.pre_context == ["function add(a, b) {", '\t"use strict";']
+        # assert frame.context_line == "\treturn a + b; // fôo"
+        # assert frame.post_context == ["}", ""]
 
     @responses.activate
     def test_indexed_sourcemap_source_expansion(self):
@@ -1330,15 +1344,6 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
 
         assert len(frame_list) == 6
 
-        import pprint
-
-        pprint.pprint(frame_list[0].__dict__)
-        pprint.pprint(frame_list[1].__dict__)
-        pprint.pprint(frame_list[2].__dict__)
-        pprint.pprint(frame_list[3].__dict__)
-        pprint.pprint(frame_list[4].__dict__)
-        pprint.pprint(frame_list[5].__dict__)
-
         assert frame_list[0].abs_path == "webpack:///webpack/bootstrap d9a5a31d9276b73873d3"
         assert frame_list[0].function == "bar"
         assert frame_list[0].lineno == 8
@@ -1352,6 +1357,8 @@ class JavascriptIntegrationTest(RelayStoreHelper, SnubaTestCase, TransactionTest
         assert frame_list[2].lineno == 2
 
         assert frame_list[3].abs_path == "app:///dist.bundle.js"
+        # TODO(smcache): Assertion below is the one that's correct. Use it when migrating from legacy processor.
+        # assert frame_list[3].abs_path == "webpack:///webpack/bootstrap d9a5a31d9276b73873d3"
         assert frame_list[3].function == "Object.<anonymous>"
         assert frame_list[3].lineno == 1
 

+ 0 - 3
tests/sentry/lang/javascript/test_cache.py

@@ -6,7 +6,6 @@ from sentry.lang.javascript.cache import SourceCache
 class BasicCacheTest(TestCase):
     def test_basic_features(self):
         cache = SourceCache()
-
         url = "http://example.com/foo.js"
 
         assert url not in cache
@@ -14,7 +13,6 @@ class BasicCacheTest(TestCase):
 
         cache.add(url, b"foo\nbar")
         assert url in cache
-        assert cache.get(url) is not None
         assert cache.get(url)[0] == "foo"
 
         cache.alias(url + "x", url)
@@ -23,7 +21,6 @@ class BasicCacheTest(TestCase):
 
     def test_encoding_fallback(self):
         cache = SourceCache()
-
         url = "http://example.com/foo.js"
 
         # fall back to utf-8

+ 36 - 11
tests/sentry/lang/javascript/test_processor.py

@@ -9,7 +9,6 @@ from unittest.mock import ANY, MagicMock, call, patch
 import pytest
 import responses
 from requests.exceptions import RequestException
-from symbolic import SourceMapTokenMatch
 
 from sentry import http, options
 from sentry.lang.javascript.errormapping import REACT_MAPPING_URL, rewrite_exception
@@ -25,6 +24,7 @@ from sentry.lang.javascript.processor import (
     fetch_release_file,
     fetch_sourcemap,
     generate_module,
+    get_function_for_token,
     get_max_age,
     get_release_file_cache_key,
     get_release_file_cache_key_meta,
@@ -996,24 +996,49 @@ class GenerateModuleTest(unittest.TestCase):
         )
 
 
+class GetFunctionForTokenTest(unittest.TestCase):
+    # There is no point in pulling down `SourceMapCacheToken` and creating a constructor for it.
+    def get_token(self, name):
+        class Token:
+            def __init__(self, name):
+                self.function_name = name
+
+        return Token(name)
+
+    def test_valid_name(self):
+        frame = {"function": "original"}
+        token = self.get_token("lookedup")
+        assert get_function_for_token(frame, token) == "lookedup"
+
+    def test_useless_name(self):
+        frame = {"function": "original"}
+        token = self.get_token("__webpack_require__")
+        assert get_function_for_token(frame, token) == "original"
+
+    def test_useless_name_but_no_original(self):
+        frame = {"function": None}
+        token = self.get_token("__webpack_require__")
+        assert get_function_for_token(frame, token) == "__webpack_require__"
+
+
 class FetchSourcemapTest(TestCase):
     def test_simple_base64(self):
         smap_view = fetch_sourcemap(base64_sourcemap)
-        tokens = [SourceMapTokenMatch(0, 0, 1, 0, src="/test.js", src_id=0)]
+        token = smap_view.lookup(1, 1, 0)
 
-        assert list(smap_view) == tokens
-        sv = smap_view.get_sourceview(0)
-        assert sv.get_source() == 'console.log("hello, World!")'
-        assert smap_view.get_source_name(0) == "/test.js"
+        assert token.src == "/test.js"
+        assert token.line == 1
+        assert token.col == 1
+        assert token.context_line == 'console.log("hello, World!")'
 
     def test_base64_without_padding(self):
         smap_view = fetch_sourcemap(base64_sourcemap.rstrip("="))
-        tokens = [SourceMapTokenMatch(0, 0, 1, 0, src="/test.js", src_id=0)]
+        token = smap_view.lookup(1, 1, 0)
 
-        assert list(smap_view) == tokens
-        sv = smap_view.get_sourceview(0)
-        assert sv.get_source() == 'console.log("hello, World!")'
-        assert smap_view.get_source_name(0) == "/test.js"
+        assert token.src == "/test.js"
+        assert token.line == 1
+        assert token.col == 1
+        assert token.context_line == 'console.log("hello, World!")'
 
     def test_broken_base64(self):
         with pytest.raises(UnparseableSourcemap):

Некоторые файлы не были показаны из-за большого количества измененных файлов