Browse Source

chore(issue-platform): Stop relying on `group_states` in `post_process` (#63130)

`group_states` no longer needs to be used, because we'll only ever have
a single group at a time in `post_process`. I've verified this using
metrics added in https://github.com/getsentry/sentry/pull/62722.

Once we remove usage here, we can also remove code that sends
`group_states` through, and ideally also remove the multi group stuff
from `Event`.
Dan Fuller 10 months ago
parent
commit
59570b2f78

+ 49 - 118
src/sentry/tasks/post_process.py

@@ -39,7 +39,7 @@ from sentry.utils.services import build_instance_from_options
 
 if TYPE_CHECKING:
     from sentry.eventstore.models import Event, GroupEvent
-    from sentry.eventstream.base import GroupState, GroupStates
+    from sentry.eventstream.base import GroupState
     from sentry.models.group import Group
     from sentry.models.project import Project
     from sentry.models.team import Team
@@ -536,7 +536,6 @@ def post_process_group(
     is_new_group_environment,
     cache_key,
     group_id=None,
-    group_states: GroupStates | None = None,
     occurrence_id: str | None = None,
     project_id: int | None = None,
     **kwargs,
@@ -659,89 +658,43 @@ def post_process_group(
                     event=event,
                 )
 
-        # TODO: Remove this check once we're sending all group ids as `group_states` and treat all
-        # events the same way
-        if not is_transaction_event and group_states is None:
-            # error issue
-            group_states = [
-                {
-                    "id": group_id,
-                    "is_new": is_new,
-                    "is_regression": is_regression,
-                    "is_new_group_environment": is_new_group_environment,
-                }
-            ]
+        metric_tags = {}
+        if group_id:
+            group_state: GroupState = {
+                "id": group_id,
+                "is_new": is_new,
+                "is_regression": is_regression,
+                "is_new_group_environment": is_new_group_environment,
+            }
 
-        try:
-            if group_states is not None:
-                if not is_transaction_event:
-                    if len(group_states) == 0:
-                        metrics.incr("sentry.tasks.post_process.error_empty_group_states")
-                    elif len(group_states) > 1:
-                        metrics.incr("sentry.tasks.post_process.error_too_many_group_states")
-                    elif group_id != group_states[0]["id"]:
-                        metrics.incr(
-                            "sentry.tasks.post_process.error_group_states_dont_match_group"
-                        )
-                else:
-                    if len(group_states) == 1:
-                        metrics.incr("sentry.tasks.post_process.transaction_has_group_state")
-                        if group_id != group_states[0]["id"]:
-                            metrics.incr(
-                                "sentry.tasks.post_process.transaction_group_states_dont_match_group"
-                            )
-                    if len(group_states) > 1:
-                        metrics.incr(
-                            "sentry.tasks.post_process.transaction_has_too_many_group_states"
-                        )
-        except Exception:
-            logger.exception(
-                "Error logging group_states stats. If this happens it's noisy but not critical, nothing is broken"
-            )
+            update_event_group(event, group_state)
+            bind_organization_context(event.project.organization)
+            _capture_event_stats(event)
+            if should_update_escalating_metrics(event, is_transaction_event):
+                _update_escalating_metrics(event)
 
-        update_event_groups(event, group_states)
-        bind_organization_context(event.project.organization)
-        _capture_event_stats(event)
-        if should_update_escalating_metrics(event, is_transaction_event):
-            _update_escalating_metrics(event)
-
-        group_events: Mapping[int, GroupEvent] = {
-            ge.group_id: ge for ge in list(event.build_group_events())
-        }
-        if occurrence is not None:
-            for ge in group_events.values():
-                ge.occurrence = occurrence
-
-        multi_groups = []
-        if group_states:
-            for gs in group_states:
-                gs_id = gs.get("id")
-                if gs_id:
-                    associated_event = group_events.get(gs_id)
-                    if associated_event:
-                        multi_groups.append((associated_event, gs))
-
-        group_jobs: Sequence[PostProcessJob] = [
-            {
-                "event": ge,
-                "group_state": gs,
+            group_events: Mapping[int, GroupEvent] = {
+                ge.group_id: ge for ge in list(event.build_group_events())
+            }
+            if occurrence is not None:
+                for ge in group_events.values():
+                    ge.occurrence = occurrence
+
+            group_job: PostProcessJob = {
+                "event": group_events[group_state["id"]],
+                "group_state": group_state,
                 "is_reprocessed": is_reprocessed,
-                "has_reappeared": bool(not gs["is_new"]),
+                "has_reappeared": bool(not group_state["is_new"]),
                 "has_alert": False,
                 "has_escalated": False,
             }
-            for ge, gs in multi_groups
-        ]
-
-        for job in group_jobs:
-            run_post_process_job(job)
+            run_post_process_job(group_job)
 
-        metric_tags = {}
-        if group_events:
-            # In practice, we only have one group here and will be removing the list of jobs. For now, just grab a
-            # random one
-            group_event = list(group_events.values())[0]
-            metric_tags["occurrence_type"] = group_event.group.issue_type.slug
+            if group_events:
+                # In practice, we only have one group here and will be removing the list of jobs. For now, just grab a
+                # random one
+                group_event = list(group_events.values())[0]
+                metric_tags["occurrence_type"] = group_event.group.issue_type.slug
 
         if not is_reprocessed and event.data.get("received"):
             duration = time() - event.data["received"]
@@ -842,54 +795,32 @@ def process_event(data: dict, group_id: int | None) -> Event:
     return event
 
 
-def update_event_groups(event: Event, group_states: GroupStates | None = None) -> None:
+def update_event_group(event: Event, group_state: GroupState) -> None:
     # NOTE: we must pass through the full Event object, and not an
     # event_id since the Event object may not actually have been stored
     # in the database due to sampling.
     from sentry.models.group import get_group_with_redirect
 
-    # event.group_id can be None in the case of transaction events
-    if event.group_id is not None:
-        # deprecated event.group and event.group_id usage, kept here for backwards compatibility
-        event.group, _ = get_group_with_redirect(event.group_id)
-        if event.group:
-            event.group_id = event.group.id
-            # We buffer updates to last_seen, assume its at least >= the event datetime
-            event.group.last_seen = max(event.datetime, event.group.last_seen)
-
     # Re-bind Group since we're reading the Event object
     # from cache, which may contain a stale group and project
-    group_states = group_states or (
-        [
-            {
-                "id": event.group_id,
-                "is_new": False,
-                "is_new_group_environment": False,
-                "is_regression": False,
-            }
-        ]
-        if event.group_id
-        else []
-    )
-    rebound_groups = []
-    for group_state in group_states:
-        rebound_group = get_group_with_redirect(group_state["id"])[0]
-        # We buffer updates to last_seen, assume its at least >= the event datetime
-        rebound_group.last_seen = max(event.datetime, rebound_group.last_seen)
-
-        # We fetch buffered updates to group aggregates here and populate them on the Group. This
-        # helps us avoid problems with processing group ignores and alert rules that rely on these
-        # stats.
-        with sentry_sdk.start_span(op="tasks.post_process_group.fetch_buffered_group_stats"):
-            fetch_buffered_group_stats(rebound_group)
-
-        rebound_group.project = event.project
-        rebound_group.project.set_cached_field_value("organization", event.project.organization)
-
-        group_state["id"] = rebound_group.id
-        rebound_groups.append(rebound_group)
+    rebound_group = get_group_with_redirect(group_state["id"])[0]
+    # We buffer updates to last_seen, assume it's at least >= the event datetime
+    rebound_group.last_seen = max(event.datetime, rebound_group.last_seen)
+
+    # We fetch buffered updates to group aggregates here and populate them on the Group. This
+    # helps us avoid problems with processing group ignores and alert rules that rely on these
+    # stats.
+    with sentry_sdk.start_span(op="tasks.post_process_group.fetch_buffered_group_stats"):
+        fetch_buffered_group_stats(rebound_group)
+
+    rebound_group.project = event.project
+    rebound_group.project.set_cached_field_value("organization", event.project.organization)
+    group_state["id"] = rebound_group.id
+    if event.group_id is not None:
+        # deprecated event.group and event.group_id usage, kept here for backwards compatibility
+        event.group = rebound_group
 
-    event.groups = rebound_groups
+    event.groups = [rebound_group]
 
 
 def process_inbox_adds(job: PostProcessJob) -> None:

+ 0 - 8
tests/sentry/mail/test_actions.py

@@ -265,14 +265,6 @@ class NotifyEmailTest(RuleTestCase, PerformanceIssueTestCase):
                 is_regression=False,
                 is_new_group_environment=False,
                 cache_key=write_event_to_cache(event),
-                group_states=[
-                    {
-                        "id": event.group_id,
-                        "is_new": True,
-                        "is_regression": False,
-                        "is_new_group_environment": False,
-                    }
-                ],
                 occurrence_id=event.occurrence_id,
                 project_id=event.group.project_id,
                 group_id=event.group_id,

+ 5 - 35
tests/sentry/tasks/test_post_process.py

@@ -2372,18 +2372,6 @@ class PostProcessGroupPerformanceTest(
     def call_post_process_group(
         self, is_new, is_regression, is_new_group_environment, event, cache_key=None
     ):
-        group_states = (
-            [
-                {
-                    "id": event.group_id,
-                    "is_new": is_new,
-                    "is_regression": is_regression,
-                    "is_new_group_environment": is_new_group_environment,
-                }
-            ]
-            if event.group_id
-            else None
-        )
         if cache_key is None:
             cache_key = write_event_to_cache(event)
         with self.feature(PerformanceNPlusOneGroupType.build_post_process_group_feature_name()):
@@ -2392,7 +2380,7 @@ class PostProcessGroupPerformanceTest(
                 is_regression=is_regression,
                 is_new_group_environment=is_new_group_environment,
                 cache_key=cache_key,
-                group_states=group_states,
+                group_id=event.group_id,
                 project_id=event.project_id,
             )
         return cache_key
@@ -2454,12 +2442,6 @@ class PostProcessGroupPerformanceTest(
     ):
         event = self.create_performance_issue()
         assert event.group
-        # cache_key = write_event_to_cache(event)
-        group_state = dict(
-            is_new=True,
-            is_regression=False,
-            is_new_group_environment=True,
-        )
 
         # TODO(jangjodi): Fix this ordering test; side_effects should be a function (lambda),
         # but because post-processing is async, this causes the assert to fail because it doesn't
@@ -2470,10 +2452,11 @@ class PostProcessGroupPerformanceTest(
         mock_process_rules.side_effect = None
 
         post_process_group(
-            **group_state,
+            is_new=True,
+            is_regression=False,
+            is_new_group_environment=True,
             cache_key="dummykey",
             group_id=event.group_id,
-            group_states=[{"id": event.group.id, **group_state}],
             occurrence_id=event.occurrence_id,
             project_id=self.project.id,
         )
@@ -2511,18 +2494,6 @@ class PostProcessGroupAggregateEventTest(
     def call_post_process_group(
         self, is_new, is_regression, is_new_group_environment, event, cache_key=None
     ):
-        group_states = (
-            [
-                {
-                    "id": event.group_id,
-                    "is_new": is_new,
-                    "is_regression": is_regression,
-                    "is_new_group_environment": is_new_group_environment,
-                }
-            ]
-            if event.group_id
-            else None
-        )
         if cache_key is None:
             cache_key = write_event_to_cache(event)
         with self.feature(
@@ -2533,7 +2504,7 @@ class PostProcessGroupAggregateEventTest(
                 is_regression=is_regression,
                 is_new_group_environment=is_new_group_environment,
                 cache_key=cache_key,
-                group_states=group_states,
+                group_id=event.group_id,
                 project_id=event.project_id,
             )
         return cache_key
@@ -2568,7 +2539,6 @@ class TransactionClustererTestCase(TestCase, SnubaTestCase):
             is_new_group_environment=False,
             cache_key=cache_key,
             group_id=None,
-            group_states=None,
         )
 
         assert mock_store_transaction_name.mock_calls == [