Browse Source

Correctly symbolicate mixed Stack Traces (eg JS/wasm) (#49676)

This will first go through `symbolicate_event` symbolicating JS frames,
and afterwards schedule another `symbolicate_event` for native if
needed.
Arpad Borsos 1 year ago
parent
commit
307b5616bf

+ 2 - 2
src/sentry/lang/native/processing.py

@@ -1,6 +1,6 @@
 import logging
 import posixpath
-from typing import Any, Callable, Set
+from typing import Any, Callable, Optional, Set
 
 from symbolic import ParseDebugIdError, normalize_debug_id
 
@@ -446,7 +446,7 @@ def process_native_stacktraces(symbolicator: Symbolicator, data: Any) -> Any:
     return data
 
 
-def get_native_symbolication_function(data) -> Callable[[Symbolicator, Any], Any]:
+def get_native_symbolication_function(data) -> Optional[Callable[[Symbolicator, Any], Any]]:
     if is_minidump_event(data):
         return process_minidump
     elif is_applecrashreport_event(data):

+ 3 - 0
src/sentry/lang/native/symbolicator.py

@@ -40,6 +40,9 @@ class SymbolicatorTaskKind:
     def with_low_priority(self, is_low_priority: bool) -> "SymbolicatorTaskKind":
         return dataclasses.replace(self, is_low_priority=is_low_priority)
 
+    def with_js(self, is_js: bool) -> "SymbolicatorTaskKind":
+        return dataclasses.replace(self, is_js=is_js)
+
 
 class SymbolicatorPools(Enum):
     default = "default"

+ 6 - 14
src/sentry/tasks/store.py

@@ -112,7 +112,11 @@ def _do_preprocess_event(
     project: Optional[Project],
     has_attachments: bool = False,
 ) -> None:
-    from sentry.tasks.symbolication import should_demote_symbolication, submit_symbolicate
+    from sentry.tasks.symbolication import (
+        get_symbolication_function,
+        should_demote_symbolication,
+        submit_symbolicate,
+    )
 
     if cache_key and data is None:
         data = processing.event_processing_store.get(cache_key)
@@ -139,19 +143,7 @@ def _do_preprocess_event(
             "organization", Organization.objects.get_from_cache(id=project.organization_id)
         )
 
-    is_js = False
-    if data["platform"] in ("javascript", "node"):
-        from sentry.lang.javascript.processing import (
-            get_js_symbolication_function as get_symbolication_function,
-        )
-
-        is_js = True
-    else:
-        from sentry.lang.native.processing import (
-            get_native_symbolication_function as get_symbolication_function,
-        )
-
-    symbolication_function = get_symbolication_function(data)
+    is_js, symbolication_function = get_symbolication_function(data)
     if symbolication_function:
         symbolication_function_name = getattr(symbolication_function, "__name__", "none")
 

+ 43 - 13
src/sentry/tasks/symbolication.py

@@ -1,7 +1,7 @@
 import logging
 import random
 from time import sleep, time
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Optional, Tuple
 
 import sentry_sdk
 from django.conf import settings
@@ -80,6 +80,29 @@ def should_demote_symbolication(project_id: int) -> bool:
             return False
 
 
+def get_symbolication_function(
+    data: Any, is_in_symbolicate_event: bool = False
+) -> Tuple[bool, Optional[Callable[[Symbolicator, Any], Any]]]:
+    # for JS events that have *not* been processed yet:
+    if data["platform"] in ("javascript", "node") and not data.get(
+        "processed_by_symbolicator", False
+    ):
+        from sentry.lang.javascript.processing import (
+            get_js_symbolication_function,
+            process_js_stacktraces,
+        )
+
+        # if we are already in `symbolicate_event`, we do *not* want to sample events
+        if is_in_symbolicate_event:
+            return True, process_js_stacktraces
+        else:
+            return True, get_js_symbolication_function(data)
+    else:
+        from sentry.lang.native.processing import get_native_symbolication_function
+
+        return False, get_native_symbolication_function(data)
+
+
 def _do_symbolicate_event(
     cache_key: str,
     start_time: Optional[int],
@@ -127,7 +150,22 @@ def _do_symbolicate_event(
             )
             return
 
-    def _continue_to_process_event() -> None:
+    def _continue_to_process_event(was_killswitched: bool = False) -> None:
+        # for JS events, we check `get_symbolication_function` again *after*
+        # symbolication, because maybe we need to feed it to another round of
+        # `symbolicate_event`, but for *native* that time.
+        if not was_killswitched and task_kind.is_js:
+            _, symbolication_function = get_symbolication_function(data, True)
+            if symbolication_function:
+                submit_symbolicate(
+                    task_kind=task_kind.with_js(False),
+                    cache_key=cache_key,
+                    event_id=event_id,
+                    start_time=start_time,
+                    has_attachments=has_attachments,
+                )
+                return
+        # else:
         store.submit_process(
             from_reprocessing=task_kind.is_reprocessing,
             cache_key=cache_key,
@@ -138,18 +176,10 @@ def _do_symbolicate_event(
             has_attachments=has_attachments,
         )
 
-    if data["platform"] in ("javascript", "node"):
-        from sentry.lang.javascript.processing import process_js_stacktraces
-
-        symbolication_function = process_js_stacktraces
-    else:
-        from sentry.lang.native.processing import get_native_symbolication_function
-
-        symbolication_function = get_native_symbolication_function(data)
-
+    _, symbolication_function = get_symbolication_function(data, True)
     symbolication_function_name = getattr(symbolication_function, "__name__", "none")
 
-    if killswitch_matches_context(
+    if symbolication_function is None or killswitch_matches_context(
         "store.load-shed-symbolicate-event-projects",
         {
             "project_id": project_id,
@@ -158,7 +188,7 @@ def _do_symbolicate_event(
             "symbolication_function": symbolication_function_name,
         },
     ):
-        return _continue_to_process_event()
+        return _continue_to_process_event(True)
 
     symbolication_start_time = time()
 

BIN
tests/symbolicator/fixtures/a18fd85d4a4eb893022d6bfad846b1.debug


+ 173 - 1
tests/symbolicator/test_payload_full.py

@@ -1,16 +1,26 @@
+import os.path
 import zipfile
 from io import BytesIO
 from unittest.mock import patch
+from uuid import uuid4
 
 import pytest
 from django.core.files.uploadedfile import SimpleUploadedFile
 from django.urls import reverse
 
 from sentry import eventstore
-from sentry.models import File, ProjectDebugFile
+from sentry.models import (
+    ArtifactBundle,
+    DebugIdArtifactBundle,
+    File,
+    ProjectDebugFile,
+    SourceFileType,
+)
 from sentry.testutils import RelayStoreHelper, TransactionTestCase
 from sentry.testutils.factories import get_fixture_path
 from sentry.testutils.helpers.datetime import before_now, iso_format
+from sentry.testutils.helpers.options import override_options
+from sentry.utils import json
 from tests.symbolicator import insta_snapshot_native_stacktrace_data, redact_location
 
 # IMPORTANT:
@@ -63,10 +73,20 @@ REAL_RESOLVING_EVENT_DATA = {
 }
 
 
+def get_local_fixture_path(name):
+    return os.path.join(os.path.dirname(__file__), "fixtures", name)
+
+
+def load_fixture(name):
+    with open(get_local_fixture_path(name), "rb") as fp:
+        return fp.read()
+
+
 class SymbolicatorResolvingIntegrationTest(RelayStoreHelper, TransactionTestCase):
     @pytest.fixture(autouse=True)
     def initialize(self, live_server):
         self.project.update_option("sentry:builtin_symbol_sources", [])
+        self.min_ago = iso_format(before_now(minutes=1))
 
         with patch("sentry.auth.system.is_internal_ip", return_value=True), self.options(
             {"system.url-prefix": live_server.url}
@@ -251,3 +271,155 @@ class SymbolicatorResolvingIntegrationTest(RelayStoreHelper, TransactionTestCase
         candidates = event.data["debug_meta"]["images"][0]["candidates"]
         redact_location(candidates)
         self.insta_snapshot(candidates)
+
+    def test_resolve_mixed_stack_trace(self):
+        # native debug files:
+        wasm_file = File.objects.create(
+            name="test.wasm", type="default", headers={"Content-Type": "application/wasm"}
+        )
+
+        with open(get_local_fixture_path("a18fd85d4a4eb893022d6bfad846b1.debug"), "rb") as f:
+            wasm_file.putfile(f)
+
+        # FIXME(swatinem):
+        # We have a problem, and it seems to be serious! :-(
+        # The `debug_files` endpoints is using the `ProjectDebugFile.id`, which
+        # symbolicator is using for caching etc.
+        # Whereas the `artifact_lookup` is using the `File.id`, which symbolicator
+        # uses in the same way.
+        # These two IDs can have collisions !!!
+        # This can happen right here in this fest when you switch the order
+        # of creating the native / js files:
+        # In that case, the `File.id` would be `1`, the same as the `ProjectDebugFile.id`.
+        # In that case, as we are JS-symbolicating first, symbolicator will fetch
+        # the bundle with id `1`, and then later on reusing that same cache for
+        # the wasm file with id `1`, although the cache now already has the JS
+        # bundle cached as id `1`, and tries to reuse that for wasm, which is wrong.
+        #
+        # The symbolicator cache is also keyed on the `scope` (aka project id),
+        # and it is extremely unlikely to have such collisions in production for
+        # the same project.
+        ProjectDebugFile.objects.create(
+            file=wasm_file,
+            object_name="test.wasm",
+            cpu_name="wasm32",
+            project_id=self.project.id,
+            debug_id="bda18fd8-5d4a-4eb8-9302-2d6bfad846b1",
+            code_id="bda18fd85d4a4eb893022d6bfad846b1",
+        )
+
+        # JS debug files:
+        debug_id = "c941d872-af1f-4f0c-a7ff-ad3d295fe153"
+
+        compressed = BytesIO(b"SYSB")
+        with zipfile.ZipFile(compressed, "a") as zip_file:
+            zip_file.writestr("files/_/_/test.min.js", load_fixture("test.min.js"))
+            zip_file.writestr("files/_/_/test.map", load_fixture("test.map"))
+
+            zip_file.writestr(
+                "manifest.json",
+                json.dumps(
+                    {
+                        "files": {
+                            "files/_/_/test.min.js": {
+                                "url": "~/test.min.js",
+                                "type": "minified_source",
+                                "headers": {
+                                    "debug-id": debug_id,
+                                    "sourcemap": "test.map",
+                                },
+                            },
+                            "files/_/_/test.map": {
+                                "url": "~/file.wc.sourcemap.js",
+                                "type": "source_map",
+                                "headers": {
+                                    "debug-id": debug_id,
+                                },
+                            },
+                        },
+                    }
+                ),
+            )
+        compressed.seek(0)
+        bundle_file = File.objects.create(name="bundle.zip", type="artifact.bundle")
+        bundle_file.putfile(compressed)
+
+        artifact_bundle = ArtifactBundle.objects.create(
+            organization_id=self.organization.id,
+            bundle_id=uuid4(),
+            file=bundle_file,
+            artifact_count=2,
+        )
+
+        DebugIdArtifactBundle.objects.create(
+            organization_id=self.organization.id,
+            debug_id=debug_id,
+            artifact_bundle=artifact_bundle,
+            source_file_type=SourceFileType.MINIFIED_SOURCE.value,
+        )
+        DebugIdArtifactBundle.objects.create(
+            organization_id=self.organization.id,
+            debug_id=debug_id,
+            artifact_bundle=artifact_bundle,
+            source_file_type=SourceFileType.SOURCE_MAP.value,
+        )
+
+        data = {
+            "timestamp": self.min_ago,
+            "message": "hello",
+            "platform": "javascript",
+            "release": "abc",
+            "exception": {
+                "values": [
+                    {
+                        "type": "Error",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "abs_path": "http://example.com/test.min.js",
+                                    "lineno": 1,
+                                    "colno": 183,
+                                },
+                                {
+                                    "platform": "native",
+                                    "instruction_addr": "0x8c",
+                                    "addr_mode": "rel:0",
+                                },
+                            ]
+                        },
+                    }
+                ]
+            },
+            "debug_meta": {
+                "images": [
+                    {
+                        "type": "sourcemap",
+                        "debug_id": debug_id,
+                        "code_file": "http://example.com/test.min.js",
+                    },
+                    {
+                        "type": "wasm",
+                        "debug_id": "bda18fd8-5d4a-4eb8-9302-2d6bfad846b1",
+                        "code_id": "bda18fd85d4a4eb893022d6bfad846b1",
+                        "debug_file": "file://foo.invalid/demo.wasm",
+                    },
+                ]
+            },
+        }
+
+        with override_options({"symbolicator.sourcemaps-processing-sample-rate": 1.0}):
+            event = self.post_and_retrieve_event(data)
+
+        exception = event.interfaces["exception"]
+        frames = exception.values[0].stacktrace.frames
+        assert frames[0].abs_path == "http://example.com/test.js"
+        assert frames[0].lineno == 20
+        assert frames[0].colno == 5
+        assert frames[0].context_line == "    invoke(data);"
+
+        assert frames[1].abs_path == "/Users/mitsuhiko/Development/wasm-example/simple/src/lib.rs"
+        assert frames[1].lineno == 19
+        assert frames[1].function == "internal_func"
+
+        images = event.data["debug_meta"]["images"]
+        assert images[1]["debug_status"] == "found"