Browse Source

fix(seer): Fix a few bugs in `get_stacktrace_string` (#69673)

This makes a few fixes to the function we use to stringify stacktraces to send to seer. Specifically:

- It reverses both the order in which we process chained exceptions, so that when we need to truncate the stacktrace, we prioritize including the most recent (outermost) exceptions.

  In order to test this change, it also adds a new test helper, `create_exception`, and modifies `create_frames` so that you can specify a generator for context lines. (This allows you to have a different pattern for context lines in each exception when there are multiple exceptions.) It also adjusts the expected output in one of the existing tests to account for the reversal.

- It removes an extra incrementation of the frame counter which happens when we fall into the `if frame_count + num_frames > MAX_FRAME_COUNT` conditional, as in that case we're ending up incrementing it both inside and outside the `if`.

- It removes the initialization of `contributing_frames` which happens prior to and outside of the `elif exception_value.get("id") == "stacktrace"` branch, since `contributing_frames` is only used inside the branch.

- It moves the check to make sure the exception contributes (and therefore whether its exception type and value and its stacktrace frames should be added into the final result) to before the exception processing, so that we don't bother to do the work if we're not going to use the results. Similarly, it adds a check to the stacktrace processing which short circuits if we already have the maximum allowable number of frames. (If we skip stacktrace processing we do still include the exception type and value.)

- Finally, it fixes the label given to each frame's function in the resulting string to be `function` rather than `line`, and adjusts tests to match.

Note that there are still a few questions about the way we format the string which we're purposely not addressing here. See https://github.com/getsentry/sentry/issues/69696.
Katie Byers 10 months ago
parent
commit
377ad5eab2

+ 11 - 14
src/sentry/api/endpoints/group_similar_issues_embeddings.py

@@ -51,20 +51,18 @@ def get_stacktrace_string(data):
 
     frame_count = 0
     stacktrace_str = ""
-    for exception in exceptions:
-        if exception.get("id") not in ["exception", "threads"]:
+    for exception in reversed(exceptions):
+        if exception.get("id") not in ["exception", "threads"] or not exception.get("contributes"):
             continue
 
-        # For each exception, extract its type, value, and stacktrace frames
+        # For each exception, extract its type, value, and up to 50 stacktrace frames
         exc_type, exc_value, frame_str = "", "", ""
         for exception_value in exception.get("values", []):
-            contributing_frames = []
             if exception_value.get("id") == "type":
                 exc_type = get_value_if_exists(exception_value)
             elif exception_value.get("id") == "value":
                 exc_value = get_value_if_exists(exception_value)
-            elif exception_value.get("id") == "stacktrace":
-                # Take the last 50 in-app and contributing frames
+            elif exception_value.get("id") == "stacktrace" and frame_count < MAX_FRAME_COUNT:
                 contributing_frames = [
                     frame
                     for frame in exception_value["values"]
@@ -74,7 +72,6 @@ def get_stacktrace_string(data):
                 if frame_count + num_frames > MAX_FRAME_COUNT:
                     remaining_frame_count = MAX_FRAME_COUNT - frame_count
                     contributing_frames = contributing_frames[-remaining_frame_count:]
-                    frame_count += remaining_frame_count
                     num_frames = remaining_frame_count
                 frame_count += num_frames
 
@@ -84,14 +81,14 @@ def get_stacktrace_string(data):
                         if frame_values.get("id") in frame_dict:
                             frame_dict[frame_values["id"]] = get_value_if_exists(frame_values)
 
-                    frame_str += f'  File "{frame_dict["filename"]}", line {frame_dict["function"]}\n    {frame_dict["context-line"]}\n'
+                    frame_str += f'  File "{frame_dict["filename"]}", function {frame_dict["function"]}\n    {frame_dict["context-line"]}\n'
 
-        # Add the exception values into the formatted string
-        if exception.get("contributes"):
-            if exception.get("id") == "exception":
-                stacktrace_str += f"{exc_type}: {exc_value}\n"
-            if frame_str:
-                stacktrace_str += frame_str
+        # Only exceptions have the type and value properties, so we don't need to handle the threads
+        # case here
+        if exception.get("id") == "exception":
+            stacktrace_str += f"{exc_type}: {exc_value}\n"
+        if frame_str:
+            stacktrace_str += frame_str
 
     return stacktrace_str.strip()
 

+ 95 - 9
tests/sentry/api/endpoints/test_group_similar_issues_embeddings.py

@@ -16,7 +16,7 @@ from sentry.testutils.cases import APITestCase
 from sentry.testutils.helpers.features import with_feature
 from sentry.utils import json
 
-EXPECTED_STACKTRACE_STRING = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", line divide_by_zero\n    divide = 1/0'
+EXPECTED_STACKTRACE_STRING = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", function divide_by_zero\n    divide = 1/0'
 BASE_APP_DATA: dict[str, Any] = {
     "app": {
         "type": "component",
@@ -391,7 +391,47 @@ class GroupSimilarIssuesEmbeddingsTest(APITestCase):
         self.path = f"/api/0/issues/{self.group.id}/similar-issues-embeddings/"
         self.similar_group = self.create_group(project=self.project)
 
-    def create_frames(self, num_frames, contributes=True, start_index=1):
+    def create_exception(
+        self, exception_type_str="Exception", exception_value="it broke", frames=None
+    ):
+        frames = frames or []
+        return {
+            "id": "exception",
+            "name": "exception",
+            "contributes": True,
+            "hint": None,
+            "values": [
+                {
+                    "id": "stacktrace",
+                    "name": "stack-trace",
+                    "contributes": True,
+                    "hint": None,
+                    "values": frames,
+                },
+                {
+                    "id": "type",
+                    "name": None,
+                    "contributes": True,
+                    "hint": None,
+                    "values": [exception_type_str],
+                },
+                {
+                    "id": "value",
+                    "name": None,
+                    "contributes": False,
+                    "hint": None,
+                    "values": [exception_value],
+                },
+            ],
+        }
+
+    def create_frames(
+        self,
+        num_frames,
+        contributes=True,
+        start_index=1,
+        context_line_factory=lambda i: f"test = {i}!",
+    ):
         frames = []
         for i in range(start_index, start_index + num_frames):
             frames.append(
@@ -420,7 +460,7 @@ class GroupSimilarIssuesEmbeddingsTest(APITestCase):
                             "name": None,
                             "contributes": contributes,
                             "hint": None,
-                            "values": ["test = " + str(i) + "!"],
+                            "values": [context_line_factory(i)],
                         },
                     ],
                 }
@@ -453,7 +493,7 @@ class GroupSimilarIssuesEmbeddingsTest(APITestCase):
 
     def test_get_stacktrace_string_simple(self):
         stacktrace_str = get_stacktrace_string(BASE_APP_DATA)
-        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", line divide_by_zero\n    divide = 1/0'
+        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", function divide_by_zero\n    divide = 1/0'
         assert stacktrace_str == expected_stacktrace_str
 
     def test_get_stacktrace_string_no_values(self):
@@ -486,7 +526,7 @@ class GroupSimilarIssuesEmbeddingsTest(APITestCase):
             "values"
         ] += self.create_frames(1, False)
         stacktrace_str = get_stacktrace_string(data_non_contributing_frame)
-        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", line divide_by_zero\n    divide = 1/0'
+        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", function divide_by_zero\n    divide = 1/0'
         assert stacktrace_str == expected_stacktrace_str
 
     def test_get_stacktrace_string_no_stacktrace(self):
@@ -497,18 +537,64 @@ class GroupSimilarIssuesEmbeddingsTest(APITestCase):
 
     def test_get_stacktrace_string_chained(self):
         stacktrace_str = get_stacktrace_string(CHAINED_APP_DATA)
-        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", line divide_by_zero\n    divide = 1/0\nException: Catch divide by zero error\n  File "python_onboarding.py", line <module>\n    divide_by_zero()\n  File "python_onboarding.py", line divide_by_zero\n    raise Exception("Catch divide by zero error")'
+        expected_stacktrace_str = 'Exception: Catch divide by zero error\n  File "python_onboarding.py", function <module>\n    divide_by_zero()\n  File "python_onboarding.py", function divide_by_zero\n    raise Exception("Catch divide by zero error")\nZeroDivisionError: division by zero\n  File "python_onboarding.py", function divide_by_zero\n    divide = 1/0'
         assert stacktrace_str == expected_stacktrace_str
 
+    def test_get_stacktrace_string_chained_too_many_frames(self):
+        data_chained_exception = copy.deepcopy(CHAINED_APP_DATA)
+        data_chained_exception["app"]["component"]["values"][0]["values"] = [
+            self.create_exception(
+                exception_type_str="InnerException",
+                exception_value="nope",
+                frames=self.create_frames(
+                    num_frames=30, context_line_factory=lambda i: f"inner line {i}"
+                ),
+            ),
+            self.create_exception(
+                exception_type_str="MiddleException",
+                exception_value="un-uh",
+                frames=self.create_frames(
+                    num_frames=30, context_line_factory=lambda i: f"middle line {i}"
+                ),
+            ),
+            self.create_exception(
+                exception_type_str="OuterException",
+                exception_value="no way",
+                frames=self.create_frames(
+                    num_frames=30, context_line_factory=lambda i: f"outer line {i}"
+                ),
+            ),
+        ]
+        stacktrace_str = get_stacktrace_string(data_chained_exception)
+
+        # The stacktrace string should be:
+        #    30 frames from OuterExcepton (with lines counting up from 1 to 30), followed by
+        #    20 frames from MiddleExcepton (with lines counting up from 11 to 30), followed by
+        #    no frames from InnerExcepton (though the type and value are in there)
+        expected = "".join(
+            ["OuterException: no way"]
+            + [
+                f'\n  File "hello.py", function hello_there\n    outer line {i}'
+                for i in range(1, 31)  #
+            ]
+            + ["\nMiddleException: un-uh"]
+            + [
+                f'\n  File "hello.py", function hello_there\n    middle line {i}'
+                for i in range(11, 31)
+            ]
+            + ["\nInnerException: nope"]
+        )
+        assert stacktrace_str == expected
+
     def test_get_stacktrace_string_thread(self):
         stacktrace_str = get_stacktrace_string(MOBILE_THREAD_DATA)
-        assert stacktrace_str == 'File "", line TestHandler'
+        assert stacktrace_str == 'File "", function TestHandler'
 
     def test_get_stacktrace_string_system(self):
         data_system = copy.deepcopy(BASE_APP_DATA)
         data_system["system"] = data_system.pop("app")
         stacktrace_str = get_stacktrace_string(data_system)
-        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", line divide_by_zero\n    divide = 1/0'
+        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", function divide_by_zero\n    divide = 1/0'
         assert stacktrace_str == expected_stacktrace_str
 
     def test_get_stacktrace_string_app_and_system(self):
@@ -519,7 +605,7 @@ class GroupSimilarIssuesEmbeddingsTest(APITestCase):
         data.update({"system": data_system})
 
         stacktrace_str = get_stacktrace_string(data)
-        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", line divide_by_zero\n    divide = 1/0'
+        expected_stacktrace_str = 'ZeroDivisionError: division by zero\n  File "python_onboarding.py", function divide_by_zero\n    divide = 1/0'
         assert stacktrace_str == expected_stacktrace_str
 
     def test_get_stacktrace_string_no_app_no_system(self):