Browse Source

chore(similarity): Handle no filename and module (#81272)

Replace filename with module if there is no filename
Do not send stacktrace to seer if a frame does not have module or
filename
Jodi Jang 3 months ago
parent
commit
fc8225b8ae
2 changed files with 69 additions and 4 deletions
  1. 25 1
      src/sentry/seer/similarity/utils.py
  2. 44 3
      tests/sentry/seer/similarity/test_utils.py

+ 25 - 1
src/sentry/seer/similarity/utils.py

@@ -161,6 +161,10 @@ class TooManyOnlySystemFramesException(Exception):
     pass
 
 
+class NoFilenameOrModuleException(Exception):
+    pass
+
+
 def _get_value_if_exists(exception_value: dict[str, Any]) -> str:
     return exception_value["values"][0] if exception_value.get("values") else ""
 
@@ -188,6 +192,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
     frame_count = 0
     html_frame_count = 0  # for a temporary metric
     is_frames_truncated = False
+    has_no_filename_or_module = False
     stacktrace_str = ""
     found_non_snipped_context_line = False
 
@@ -197,6 +202,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
         nonlocal frame_count
         nonlocal html_frame_count
         nonlocal is_frames_truncated
+        nonlocal has_no_filename_or_module
         nonlocal found_non_snipped_context_line
         frame_strings = []
 
@@ -211,7 +217,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
         frame_count += len(contributing_frames)
 
         for frame in contributing_frames:
-            frame_dict = {"filename": "", "function": "", "context-line": ""}
+            frame_dict = {"filename": "", "function": "", "context-line": "", "module": ""}
             for frame_values in frame.get("values", []):
                 if frame_values.get("id") in frame_dict:
                     frame_dict[frame_values["id"]] = _get_value_if_exists(frame_values)
@@ -219,6 +225,11 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
             if not _is_snipped_context_line(frame_dict["context-line"]):
                 found_non_snipped_context_line = True
 
+            if frame_dict["filename"] == "" and frame_dict["module"] == "":
+                has_no_filename_or_module = True
+            elif frame_dict["filename"] == "":
+                frame_dict["filename"] = frame_dict["module"]
+
             # Not an exhaustive list of tests we could run to detect HTML, but this is only
             # meant to be a temporary, quick-and-dirty metric
             # TODO: Don't let this, and the metric below, hang around forever. It's only to
@@ -271,6 +282,8 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
                     frame_strings = _process_frames(exception_value["values"])
         if is_frames_truncated and not app_hash:
             raise TooManyOnlySystemFramesException
+        if has_no_filename_or_module:
+            raise NoFilenameOrModuleException
         # Only exceptions have the type and value properties, so we don't need to handle the threads
         # case here
         header = f"{exc_type}: {exc_value}\n" if exception["id"] == "exception" else ""
@@ -328,6 +341,17 @@ def get_stacktrace_string_with_metrics(
                 },
             )
         stacktrace_string = None
+    except NoFilenameOrModuleException:
+        if referrer == ReferrerOptions.INGEST:
+            metrics.incr(
+                "grouping.similarity.did_call_seer",
+                sample_rate=options.get("seer.similarity.metrics_sample_rate"),
+                tags={
+                    "call_made": False,
+                    "blocker": "no-module-or-filename",
+                },
+            )
+        stacktrace_string = None
     return stacktrace_string
 
 

+ 44 - 3
tests/sentry/seer/similarity/test_utils.py

@@ -1,20 +1,25 @@
 import copy
 from collections.abc import Callable
 from typing import Any, Literal, cast
+from unittest.mock import patch
 from uuid import uuid1
 
 import pytest
 
+from sentry import options
 from sentry.eventstore.models import Event
 from sentry.seer.similarity.utils import (
     BASE64_ENCODED_PREFIXES,
     MAX_FRAME_COUNT,
     SEER_ELIGIBLE_PLATFORMS,
+    NoFilenameOrModuleException,
+    ReferrerOptions,
     TooManyOnlySystemFramesException,
     _is_snipped_context_line,
     event_content_is_seer_eligible,
     filter_null_from_string,
     get_stacktrace_string,
+    get_stacktrace_string_with_metrics,
 )
 from sentry.testutils.cases import TestCase
 
@@ -331,14 +336,14 @@ class GetStacktraceStringTest(TestCase):
                                                 "name": None,
                                                 "contributes": True,
                                                 "hint": None,
-                                                "values": [],
+                                                "values": ["module"],
                                             },
                                             {
                                                 "id": "filename",
                                                 "name": None,
                                                 "contributes": True,
                                                 "hint": None,
-                                                "values": [],
+                                                "values": ["filename"],
                                             },
                                             {
                                                 "id": "function",
@@ -691,7 +696,7 @@ class GetStacktraceStringTest(TestCase):
 
     def test_thread(self):
         stacktrace_str = get_stacktrace_string(self.MOBILE_THREAD_DATA)
-        assert stacktrace_str == 'File "", function TestHandler'
+        assert stacktrace_str == 'File "filename", function TestHandler'
 
     def test_system(self):
         data_system = copy.deepcopy(self.BASE_APP_DATA)
@@ -814,6 +819,42 @@ class GetStacktraceStringTest(TestCase):
         stacktrace_str = get_stacktrace_string(self.ONLY_STACKTRACE)
         assert stacktrace_str == 'File "index.php", function \n    $server->emit($server->run());'
 
+    def test_replace_file_with_module(self):
+        exception = copy.deepcopy(self.BASE_APP_DATA)
+        # delete filename from the exception
+        del exception["app"]["component"]["values"][0]["values"][0]["values"][0]["values"][1]
+        stacktrace_string = get_stacktrace_string_with_metrics(
+            exception, "python", ReferrerOptions.INGEST
+        )
+        assert (
+            stacktrace_string
+            == 'ZeroDivisionError: division by zero\n  File "__main__", function divide_by_zero\n    divide = 1/0'
+        )
+
+    @patch("sentry.seer.similarity.utils.metrics")
+    def test_no_filename_or_module(self, mock_metrics):
+        exception = copy.deepcopy(self.BASE_APP_DATA)
+        # delete module from the exception
+        del exception["app"]["component"]["values"][0]["values"][0]["values"][0]["values"][0]
+        # delete filename from the exception
+        del exception["app"]["component"]["values"][0]["values"][0]["values"][0]["values"][0]
+        with pytest.raises(NoFilenameOrModuleException):
+            get_stacktrace_string(exception)
+
+        stacktrace_string = get_stacktrace_string_with_metrics(
+            exception, "python", ReferrerOptions.INGEST
+        )
+        sample_rate = options.get("seer.similarity.metrics_sample_rate")
+        assert stacktrace_string is None
+        mock_metrics.incr.assert_called_with(
+            "grouping.similarity.did_call_seer",
+            sample_rate=sample_rate,
+            tags={
+                "call_made": False,
+                "blocker": "no-module-or-filename",
+            },
+        )
+
 
 class EventContentIsSeerEligibleTest(TestCase):
     def get_eligible_event_data(self) -> dict[str, Any]: