Browse Source

fix(grouping): Clear memoized interfaces after stack trace normalization (#27129)

Previously, grouping enhancements would use a memoized version of the
frame data, so any modifications made in
normalize_stacktraces_for_grouping were not considered by the enhancer.

The impact of this bug is basically that +app/-app stack trace rules are
not considered during grouping at all, only in the grouping debug view.
It also has similarly catastrophic effects on hierarchical grouping
where all categorization is basically unapplied.
Joris Bayer 3 years ago
parent
commit
12ee98a7ca

+ 1 - 4
src/sentry/event_manager.py

@@ -71,7 +71,6 @@ from sentry.reprocessing2 import (
     save_unprocessed_event,
 )
 from sentry.signals import first_event_received, first_transaction_received, issue_unresolved
-from sentry.stacktraces.processing import normalize_stacktraces_for_grouping
 from sentry.tasks.integrations import kick_off_status_syncs
 from sentry.utils import json, metrics
 from sentry.utils.cache import cache_key_for_event
@@ -1630,9 +1629,7 @@ def _calculate_event_grouping(project, event, grouping_config) -> CalculatedHash
 
     with metrics.timer("event_manager.normalize_stacktraces_for_grouping"):
         with sentry_sdk.start_span(op="event_manager.normalize_stacktraces_for_grouping"):
-            normalize_stacktraces_for_grouping(
-                event.data.data, load_grouping_config(grouping_config)
-            )
+            event.normalize_stacktraces_for_grouping(load_grouping_config(grouping_config))
 
     with metrics.timer("event_manager.apply_server_fingerprinting"):
         # The active grouping config was put into the event in the

+ 13 - 2
src/sentry/eventstore/models.py

@@ -439,6 +439,18 @@ class Event:
 
         return filtered_hashes, tree_labels
 
+    def normalize_stacktraces_for_grouping(self, grouping_config):
+        """Normalize stacktraces and clear memoized interfaces
+
+        See stand-alone function normalize_stacktraces_for_grouping
+        """
+        from sentry.stacktraces.processing import normalize_stacktraces_for_grouping
+
+        normalize_stacktraces_for_grouping(self.data, grouping_config)
+
+        # We have modified event data, so any cached interfaces have to be reset:
+        self.__dict__.pop("interfaces", None)
+
     def get_grouping_variants(self, force_config=None, normalize_stacktraces=False):
         """
         This is similar to `get_hashes` but will instead return the
@@ -450,7 +462,6 @@ class Event:
         in place.
         """
         from sentry.grouping.api import get_grouping_variants_for_event, load_grouping_config
-        from sentry.stacktraces.processing import normalize_stacktraces_for_grouping
 
         # Forcing configs has two separate modes.  One is where just the
         # config ID is given in which case it's merged with the stored or
@@ -474,7 +485,7 @@ class Event:
             with sentry_sdk.start_span(op="grouping.normalize_stacktraces_for_grouping") as span:
                 span.set_tag("project", self.project_id)
                 span.set_tag("event_id", self.event_id)
-                normalize_stacktraces_for_grouping(self.data, config)
+                self.normalize_stacktraces_for_grouping(config)
 
         with sentry_sdk.start_span(op="grouping.get_grouping_variants") as span:
             span.set_tag("project", self.project_id)

+ 124 - 0
tests/sentry/event_manager/test_event_manager.py

@@ -1494,6 +1494,130 @@ class EventManagerTest(TestCase):
             == 1
         )
 
+    def test_category_match_in_app(self):
+        """
+        Regression test to ensure that grouping in-app enhancements work in
+        principle.
+        """
+        from sentry.grouping.enhancer import Enhancements
+
+        enhancement = Enhancements.from_config_string(
+            """
+            function:foo category=bar
+            function:foo2 category=bar
+            category:bar -app
+            """,
+        )
+
+        event = make_event(
+            platform="native",
+            exception={
+                "values": [
+                    {
+                        "type": "Hello",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "function": "foo",
+                                    "in_app": True,
+                                },
+                                {"function": "bar"},
+                            ]
+                        },
+                    }
+                ]
+            },
+        )
+
+        manager = EventManager(event)
+        manager.normalize()
+        manager.get_data()["grouping_config"] = {
+            "enhancements": enhancement.dumps(),
+            "id": "mobile:2021-02-12",
+        }
+        event1 = manager.save(1)
+        assert event1.data["exception"]["values"][0]["stacktrace"]["frames"][0]["in_app"] is False
+
+        event = make_event(
+            platform="native",
+            exception={
+                "values": [
+                    {
+                        "type": "Hello",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "function": "foo2",
+                                    "in_app": True,
+                                },
+                                {"function": "bar"},
+                            ]
+                        },
+                    }
+                ]
+            },
+        )
+
+        manager = EventManager(event)
+        manager.normalize()
+        manager.get_data()["grouping_config"] = {
+            "enhancements": enhancement.dumps(),
+            "id": "mobile:2021-02-12",
+        }
+        event2 = manager.save(1)
+        assert event2.data["exception"]["values"][0]["stacktrace"]["frames"][0]["in_app"] is False
+        assert event1.group_id == event2.group_id
+
+    def test_category_match_group(self):
+        """
+        Regression test to ensure categories are applied consistently and don't
+        produce hash mismatches.
+        """
+        from sentry.grouping.enhancer import Enhancements
+
+        enhancement = Enhancements.from_config_string(
+            """
+            function:foo category=foo_like
+            category:foo_like -group
+            """,
+        )
+
+        event = make_event(
+            platform="native",
+            exception={
+                "values": [
+                    {
+                        "type": "Hello",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "function": "foo",
+                                },
+                                {
+                                    "function": "bar",
+                                },
+                            ]
+                        },
+                    }
+                ]
+            },
+        )
+
+        manager = EventManager(event)
+        manager.normalize()
+
+        grouping_config = {
+            "enhancements": enhancement.dumps(),
+            "id": "mobile:2021-02-12",
+        }
+
+        manager.get_data()["grouping_config"] = grouping_config
+        event1 = manager.save(1)
+
+        event2 = Event(event1.project_id, event1.event_id, data=event1.data)
+
+        assert event1.get_hashes() == event2.get_hashes(grouping_config)
+
 
 class ReleaseIssueTest(TestCase):
     def setUp(self):

+ 56 - 0
tests/sentry/eventstore/test_models.py

@@ -4,6 +4,7 @@ import pytest
 
 from sentry.db.models.fields.node import NodeData
 from sentry.eventstore.models import Event
+from sentry.grouping.enhancer import Enhancements
 from sentry.models import Environment
 from sentry.testutils import TestCase
 from sentry.testutils.helpers.datetime import before_now, iso_format
@@ -242,6 +243,61 @@ class EventTest(TestCase):
         assert not event_from_nodestore.group_id
         assert not event_from_nodestore.group
 
+    def test_grouping_reset(self):
+        """
+        Regression test against a specific mutability bug involving grouping,
+        stacktrace normalization and memoized interfaces
+        """
+        event_data = {
+            "exception": {
+                "values": [
+                    {
+                        "type": "Hello",
+                        "stacktrace": {
+                            "frames": [
+                                {
+                                    "function": "foo",
+                                },
+                                {
+                                    "function": "bar",
+                                },
+                            ]
+                        },
+                    }
+                ]
+            },
+        }
+
+        enhancement = Enhancements.from_config_string(
+            """
+            function:foo category=foo_like
+            category:foo_like -group
+            """,
+        )
+        grouping_config = {
+            "enhancements": enhancement.dumps(),
+            "id": "mobile:2021-02-12",
+        }
+
+        event1 = Event(
+            event_id="a" * 32,
+            data=event_data,
+            project_id=self.project.id,
+        )
+        variants1 = event1.get_grouping_variants(grouping_config, normalize_stacktraces=True)
+
+        event2 = Event(
+            event_id="b" * 32,
+            data=event_data,
+            project_id=self.project.id,
+        )
+        event2.interfaces  # Populate cache
+        variants2 = event2.get_grouping_variants(grouping_config, normalize_stacktraces=True)
+
+        assert sorted(v.as_dict()["hash"] for v in variants1.values()) == sorted(
+            v.as_dict()["hash"] for v in variants2.values()
+        )
+
 
 @pytest.mark.django_db
 def test_renormalization(monkeypatch, factories, task_runner, default_project):