Просмотр исходного кода

feat(processor): Use symbolicator for sourcemap processing behind feature-flag

"symbolicator.sourcemaps-processing-projects": Vec<usize>
"symbolicator.sourcemaps-processing-sample-rate": f32
Kamil Ogórek 2 лет назад
Родитель
Сommit
9ecb7c4955

+ 7 - 0
src/sentry/lang/javascript/plugin.py

@@ -1,3 +1,4 @@
+from sentry.lang.javascript.utils import should_use_symbolicator_for_sourcemaps
 from sentry.plugins.base.v2 import Plugin2
 from sentry.stacktraces.processing import find_stacktraces_in_data
 from sentry.utils.safe import get_path
@@ -7,6 +8,9 @@ from .errormapping import rewrite_exception
 from .processor import JavaScriptStacktraceProcessor
 
 
+# TODO: We still need `preprocess_event` tasks and the remaining, non-symbolication specific
+# code from `lang/javascript/processor.py` to run somewhere. Unless we want whole `processor.py`
+# to be moved to Rust side, including module generation, rewriting and translations.
 def preprocess_event(data):
     rewrite_exception(data)
     translate_exception(data)
@@ -41,5 +45,8 @@ class JavascriptPlugin(Plugin2):
         return []
 
     def get_stacktrace_processors(self, data, stacktrace_infos, platforms, **kwargs):
+        if should_use_symbolicator_for_sourcemaps(data.get("project")):
+            return []
+
         if "javascript" in platforms or "node" in platforms:
             return [JavaScriptStacktraceProcessor]

+ 154 - 0
src/sentry/lang/javascript/processing.py

@@ -0,0 +1,154 @@
+import logging
+
+from sentry.lang.javascript.utils import should_use_symbolicator_for_sourcemaps
+from sentry.lang.native.error import SymbolicationFailed, write_error
+from sentry.lang.native.symbolicator import Symbolicator
+from sentry.models import EventError, Project
+from sentry.stacktraces.processing import find_stacktraces_in_data
+from sentry.utils.safe import get_path
+
+logger = logging.getLogger(__name__)
+
+
+def _merge_frame_context(new_frame, symbolicated):
+    new_frame = dict(new_frame)
+    symbolicated = dict(symbolicated)
+
+    if symbolicated.get("pre_context"):
+        new_frame["pre_context"] = symbolicated["pre_context"]
+    if symbolicated.get("context_line"):
+        new_frame["context_line"] = symbolicated["context_line"]
+    if symbolicated.get("post_context"):
+        new_frame["post_context"] = symbolicated["post_context"]
+
+    return new_frame
+
+
+def _merge_frame(new_frame, symbolicated):
+    new_frame = dict(new_frame)
+    symbolicated = dict(symbolicated)
+
+    if symbolicated.get("function"):
+        new_frame["function"] = symbolicated["function"]
+    if symbolicated.get("abs_path"):
+        new_frame["abs_path"] = symbolicated["abs_path"]
+    if symbolicated.get("filename"):
+        new_frame["filename"] = symbolicated["filename"]
+    if symbolicated.get("lineno"):
+        new_frame["lineno"] = symbolicated["lineno"]
+    if symbolicated.get("colno"):
+        new_frame["colno"] = symbolicated["colno"]
+    if symbolicated.get("pre_context"):
+        new_frame["pre_context"] = symbolicated["pre_context"]
+    if symbolicated.get("context_line"):
+        new_frame["context_line"] = symbolicated["context_line"]
+    if symbolicated.get("post_context"):
+        new_frame["post_context"] = symbolicated["post_context"]
+    if symbolicated.get("status"):
+        frame_meta = new_frame.setdefault("data", {})
+        frame_meta["symbolicator_status"] = symbolicated["status"]
+
+    return new_frame
+
+
+# TODO: Change this error handling to be JS-specific?
+def _handle_response_status(event_data, response_json):
+    if not response_json:
+        error = SymbolicationFailed(type=EventError.NATIVE_INTERNAL_FAILURE)
+    elif response_json["status"] == "completed":
+        return True
+    elif response_json["status"] == "failed":
+        error = SymbolicationFailed(
+            message=response_json.get("message") or None, type=EventError.NATIVE_SYMBOLICATOR_FAILED
+        )
+    else:
+        logger.error("Unexpected symbolicator status: %s", response_json["status"])
+        error = SymbolicationFailed(type=EventError.NATIVE_INTERNAL_FAILURE)
+
+    write_error(error, event_data)
+
+
+def get_frames_for_symbolication(frames, data):
+    return [dict(frame) for frame in reversed(frames)]
+
+
+def is_sourcemap_image(image):
+    return (
+        bool(image)
+        and image.get("type") == "sourcemap"
+        and image.get("debug_id") is not None
+        and image.get("code_file") is not None
+    )
+
+
+def sourcemap_images_from_data(data):
+    return get_path(data, "debug_meta", "images", default=(), filter=is_sourcemap_image)
+
+
+def process_payload(data):
+    # We cannot symbolicate JS stacktraces without a release.
+    # TODO: Won't be the case with DebugIDs and Artifact Bundles
+    if data.get("release") is None:
+        return
+
+    project = Project.objects.get_from_cache(id=data.get("project"))
+
+    allow_scraping_org_level = project.organization.get_option("sentry:scrape_javascript", True)
+    allow_scraping_project_level = project.get_option("sentry:scrape_javascript", True)
+    allow_scraping = allow_scraping_org_level and allow_scraping_project_level
+
+    symbolicator = Symbolicator(project=project, event_id=data["event_id"], release=data["release"])
+
+    modules = sourcemap_images_from_data(data)
+
+    stacktrace_infos = find_stacktraces_in_data(data)
+    stacktraces = [
+        {
+            "frames": get_frames_for_symbolication(sinfo.stacktrace.get("frames") or (), data),
+        }
+        for sinfo in stacktrace_infos
+    ]
+
+    if not any(stacktrace["frames"] for stacktrace in stacktraces):
+        return
+
+    response = symbolicator.process_js(
+        stacktraces=stacktraces,
+        modules=modules,
+        dist=data.get("dist"),
+        allow_scraping=allow_scraping,
+    )
+
+    if not _handle_response_status(data, response):
+        return data
+
+    assert len(stacktraces) == len(response["stacktraces"]), (stacktraces, response)
+
+    for sinfo, raw_stacktrace, complete_stacktrace in zip(
+        stacktrace_infos, response["raw_stacktraces"], response["stacktraces"]
+    ):
+        new_frames = []
+        new_sinfo_frames = []
+
+        for sinfo_frame, raw_frame, complete_frame in zip(
+            sinfo.stacktrace["frames"], raw_stacktrace["frames"], complete_stacktrace["frames"]
+        ):
+            merged_context_frame = _merge_frame_context(sinfo_frame, raw_frame)
+            new_sinfo_frames.append(merged_context_frame)
+
+            merged_frame = _merge_frame(merged_context_frame, complete_frame)
+            new_frames.append(merged_frame)
+
+        if sinfo.container is not None:
+            sinfo.container["raw_stacktrace"] = {
+                "frames": new_sinfo_frames,
+            }
+
+        sinfo.stacktrace["frames"] = new_frames
+
+    return data
+
+
+def get_symbolication_function(data):
+    if should_use_symbolicator_for_sourcemaps(data.get("project")):
+        return process_payload

+ 15 - 0
src/sentry/lang/javascript/utils.py

@@ -0,0 +1,15 @@
+from sentry import options
+
+SYMBOLICATOR_SOURCEMAPS_PROJECTS_OPTION = "symbolicator.sourcemaps-processing-projects"
+SYMBOLICATOR_SOURCEMAPS_SAMPLE_RATE_OPTION = "symbolicator.sourcemaps-processing-sample-rate"
+
+
+def should_use_symbolicator_for_sourcemaps(project_id: int) -> bool:
+    # Internal Sentry projects
+    # 11276 - sentry/javascript project for forced dogfooding
+    # settings.SENTRY_PROJECT - default project for all installations
+    # settings.SENTRY_FRONTEND_PROJECT - configurable default frontend project
+    if project_id in options.get(SYMBOLICATOR_SOURCEMAPS_PROJECTS_OPTION, []):
+        return True
+
+    return project_id % 1000 < options.get(SYMBOLICATOR_SOURCEMAPS_SAMPLE_RATE_OPTION, 0.0) * 1000

+ 33 - 0
src/sentry/lang/native/sources.py

@@ -149,6 +149,39 @@ def get_internal_source(project):
     }
 
 
+def get_internal_release_file_source(project, release):
+    """
+    Returns the source configuration for a Sentry project's release files.
+    """
+    internal_url_prefix = options.get("system.internal-url-prefix")
+    if not internal_url_prefix:
+        internal_url_prefix = options.get("system.url-prefix")
+        if sys.platform == "darwin":
+            internal_url_prefix = internal_url_prefix.replace(
+                "localhost", "host.docker.internal"
+            ).replace("127.0.0.1", "host.docker.internal")
+
+    assert internal_url_prefix
+    sentry_source_url = "{}{}".format(
+        internal_url_prefix.rstrip("/"),
+        reverse(
+            "sentry-api-0-project-release-files",
+            kwargs={
+                "organization_slug": project.organization.slug,
+                "project_slug": project.slug,
+                "version": release,
+            },
+        ),
+    )
+
+    return {
+        "type": "sentry",
+        "id": INTERNAL_SOURCE_NAME,
+        "url": sentry_source_url,
+        "token": get_system_token(),
+    }
+
+
 def is_internal_source_id(source_id):
     """Determines if a DIF object source identifier is reserved for internal sentry use.
 

+ 18 - 2
src/sentry/lang/native/symbolicator.py

@@ -9,7 +9,7 @@ from requests.exceptions import RequestException
 
 from sentry import options
 from sentry.cache import default_cache
-from sentry.lang.native.sources import sources_for_symbolication
+from sentry.lang.native.sources import get_internal_release_file_source, sources_for_symbolication
 from sentry.models import Organization
 from sentry.net.http import Session
 from sentry.tasks.symbolication import RetrySymbolication
@@ -26,7 +26,7 @@ def _task_id_cache_key_for_event(project_id, event_id):
 
 
 class Symbolicator:
-    def __init__(self, project, event_id):
+    def __init__(self, project, event_id, release=None):
         symbolicator_options = options.get("symbolicator.options")
         base_url = symbolicator_options["url"].rstrip("/")
         assert base_url
@@ -38,6 +38,7 @@ class Symbolicator:
             )
 
         self.project = project
+        self.release = release
         self.sess = SymbolicatorSession(
             url=base_url,
             project_id=str(project.id),
@@ -134,6 +135,21 @@ class Symbolicator:
         res = self._process("symbolicate_stacktraces", "symbolicate", json=json)
         return process_response(res)
 
+    def process_js(self, stacktraces, modules, dist, allow_scraping=True):
+        source = get_internal_release_file_source(self.project, self.release)
+
+        json = {
+            "source": source,
+            "stacktraces": stacktraces,
+            "modules": modules,
+            "allow_scraping": allow_scraping,
+        }
+
+        if dist is not None:
+            json["dist"] = dist
+
+        return self._process("symbolicate_js_stacktraces", "symbolicate-js", json=json)
+
 
 class TaskIdNotFound(Exception):
     pass

+ 4 - 0
src/sentry/options/defaults.py

@@ -321,6 +321,10 @@ register("symbolicator.minidump-refactor-projects-opt-in", type=Sequence, defaul
 register("symbolicator.minidump-refactor-projects-opt-out", type=Sequence, default=[])  # unused
 register("symbolicator.minidump-refactor-random-sampling", default=0.0)  # unused
 
+# Enable use of Symbolicator Source Maps processing for specific projects.
+register("symbolicator.sourcemaps-processing-projects", type=Sequence, default=[])
+# Enable use of Symbolicator Source Maps processing for fraction of projects.
+register("symbolicator.sourcemaps-processing-sample-rate", default=0.0)
 
 # Normalization after processors
 register("store.normalize-after-processing", default=0.0)  # unused

+ 5 - 1
src/sentry/tasks/store.py

@@ -109,7 +109,6 @@ def _do_preprocess_event(
     project: Optional[Project],
     has_attachments: bool = False,
 ) -> None:
-    from sentry.lang.native.processing import get_symbolication_function
     from sentry.tasks.symbolication import should_demote_symbolication, submit_symbolicate
 
     if cache_key and data is None:
@@ -137,6 +136,11 @@ def _do_preprocess_event(
             "organization", Organization.objects.get_from_cache(id=project.organization_id)
         )
 
+    if data["platform"] in ("javascript", "node"):
+        from sentry.lang.javascript.processing import get_symbolication_function
+    else:
+        from sentry.lang.native.processing import get_symbolication_function
+
     symbolication_function = get_symbolication_function(data)
     if symbolication_function:
         symbolication_function_name = getattr(symbolication_function, "__name__", "none")

+ 5 - 2
src/sentry/tasks/symbolication.py

@@ -119,8 +119,6 @@ def _do_symbolicate_event(
     queue_switches: int = 0,
     has_attachments: bool = False,
 ) -> None:
-    from sentry.lang.native.processing import get_symbolication_function
-
     if data is None:
         data = processing.event_processing_store.get(cache_key)
 
@@ -182,6 +180,11 @@ def _do_symbolicate_event(
             has_attachments=has_attachments,
         )
 
+    if data["platform"] in ("javascript", "node"):
+        from sentry.lang.javascript.processing import get_symbolication_function
+    else:
+        from sentry.lang.native.processing import get_symbolication_function
+
     symbolication_function = get_symbolication_function(data)
     symbolication_function_name = getattr(symbolication_function, "__name__", "none")
 

+ 15 - 3
tests/symbolicator/__init__.py

@@ -50,7 +50,7 @@ def strip_trailing_addr(value):
     return STRIP_TRAILING_ADDR_RE.sub("", value)
 
 
-def normalize_exception(exc):
+def normalize_native_exception(exc):
     if exc:
         exc = dict(exc)
         exc["type"] = strip_trailing_addr(exc["type"])
@@ -68,7 +68,7 @@ def strip_stacktrace_container(container):
     return container
 
 
-def insta_snapshot_stacktrace_data(self, event, **kwargs):
+def insta_snapshot_native_stacktrace_data(self, event, **kwargs):
     # limit amount of data going into a snapshot so that they don't break all
     # the time due to unrelated changes.
     self.insta_snapshot(
@@ -76,7 +76,7 @@ def insta_snapshot_stacktrace_data(self, event, **kwargs):
             "stacktrace": strip_stacktrace(event.get("stacktrace")),
             "exception": {
                 "values": [
-                    normalize_exception(strip_stacktrace_container(x))
+                    normalize_native_exception(strip_stacktrace_container(x))
                     for x in get_path(event, "exception", "values") or ()
                 ]
             },
@@ -97,6 +97,18 @@ def insta_snapshot_stacktrace_data(self, event, **kwargs):
     )
 
 
+def insta_snapshot_javascript_stacktrace_data(self, event, **kwargs):
+    # limit amount of data going into a snapshot so that they don't break all
+    # the time due to unrelated changes.
+    self.insta_snapshot(
+        {
+            "exception": {"values": [x for x in get_path(event, "exception", "values") or ()]},
+            "errors": [e for e in event.get("errors") or () if e.get("name") != "timestamp"],
+        },
+        **kwargs,
+    )
+
+
 def redact_location(candidates):
     """Redacts the sentry location URI to be independent of the specific ID.
 

+ 24 - 0
tests/symbolicator/fixtures/test.js

@@ -0,0 +1,24 @@
+var makeAFailure = (function() {
+  function onSuccess(data) {}
+
+  function onFailure(data) {
+    throw new Error('failed!');
+  }
+
+  function invoke(data) {
+    var cb = null;
+    if (data.failed) {
+      cb = onFailure;
+    } else {
+      cb = onSuccess;
+    }
+    cb(data);
+  }
+
+  function test() {
+    var data = {failed: true, value: 42};
+    invoke(data);
+  }
+
+  return test;
+})();

Некоторые файлы не были показаны из-за большого количества измененных файлов