Browse Source

ref(deletions): Refactor error events task (#78378)

This is in preparation for Issue Platform deletions (#77794).

This is also a follow-up of #78293.
Armen Zambrano G. 5 months ago
parent
commit
da8f3a84dc
3 changed files with 96 additions and 50 deletions
  1. 1 0
      pyproject.toml
  2. 82 37
      src/sentry/deletions/defaults/group.py
  3. 13 13
      tests/sentry/deletions/test_group.py

+ 1 - 0
pyproject.toml

@@ -545,6 +545,7 @@ module = [
     "sentry.web.frontend.auth_provider_login",
     "sentry.web.frontend.csv",
     "sentry_plugins.base",
+    "tests.sentry.deletions.test_group",
     "tests.sentry.event_manager.test_event_manager",
     "tests.sentry.grouping.test_fingerprinting",
     "tests.sentry.hybridcloud.*",

+ 82 - 37
src/sentry/deletions/defaults/group.py

@@ -2,13 +2,15 @@ from __future__ import annotations
 
 import os
 from collections import defaultdict
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from typing import Any
 
 from sentry import eventstore, eventstream, models, nodestore
 from sentry.eventstore.models import Event
+from sentry.issues.grouptype import GroupCategory
 from sentry.models.group import Group, GroupStatus
 from sentry.models.rulefirehistory import RuleFireHistory
+from sentry.snuba.dataset import Dataset
 from sentry.tasks.delete_seer_grouping_records import call_delete_seer_grouping_records_by_hash
 
 from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation
@@ -48,18 +50,21 @@ _GROUP_RELATED_MODELS = DIRECT_GROUP_RELATED_MODELS + (
 )
 
 
-class EventDataDeletionTask(BaseDeletionTask[Group]):
+class EventsBaseDeletionTask(BaseDeletionTask[Group]):
     """
-    Deletes nodestore data, EventAttachment and UserReports for group
+    Base class to delete events associated to groups and its related models.
     """
 
     # Number of events fetched from eventstore per chunk() call.
     DEFAULT_CHUNK_SIZE = 10000
+    referrer = "deletions.group"
+    dataset: Dataset
 
     def __init__(
         self, manager: DeletionTaskManager, groups: Sequence[Group], **kwargs: Any
     ) -> None:
         self.groups = groups
+        # Use self.last_event to keep track of the last event processed in the chunk method.
         self.last_event: Event | None = None
         self.set_group_and_project_ids()
         super().__init__(manager, **kwargs)
@@ -73,25 +78,6 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
         self.group_ids = group_ids
         self.project_ids = list(self.project_groups.keys())
 
-    def chunk(self) -> bool:
-        """It deletes DEFAULT_CHUNK_SIZE number of events and related models.
-        It returns a boolean to say if the deletion has completed."""
-        events = self.get_unfetched_events()
-        if events:
-            self.delete_events_from_nodestore(events)
-            self.delete_dangling_attachments_and_user_reports(events)
-            # This value will be used in the next call to chunk
-            self.last_event = events[-1]
-            # As long as it returns True the task will keep iterating
-            return True
-        else:
-            # Remove all group events now that their node data has been removed.
-            for project_id, group_ids in self.project_groups.items():
-                # A message is sent to Snuba that will handle deleting the events for the given groups in the project
-                eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
-                eventstream.backend.end_delete_groups(eventstream_state)
-            return False
-
     def get_unfetched_events(self) -> list[Event]:
         conditions = []
         if self.last_event is not None:
@@ -110,14 +96,45 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
                 conditions=conditions, project_ids=self.project_ids, group_ids=self.group_ids
             ),
             limit=self.DEFAULT_CHUNK_SIZE,
-            referrer="deletions.group",
+            referrer=self.referrer,
             orderby=["-timestamp", "-event_id"],
-            tenant_ids=(
-                {"organization_id": self.groups[0].project.organization_id} if self.groups else None
-            ),
+            tenant_ids=self.tenant_ids,
+            dataset=self.dataset,
         )
         return events
 
+    @property
+    def tenant_ids(self) -> Mapping[str, Any]:
+        result = {"referrer": self.referrer}
+        if self.groups:
+            result["organization_id"] = self.groups[0].project.organization_id
+        return result
+
+
+class ErrorEventsDeletionTask(EventsBaseDeletionTask):
+    """
+    Deletes nodestore data, EventAttachment and UserReports for requested groups.
+
+    This class uses the old Snuba deletion method.
+    """
+
+    dataset = Dataset.Events
+
+    def chunk(self) -> bool:
+        """This method is called to delete chunks of data. It returns a boolean to say
+        if the deletion has completed and if it needs to be called again."""
+        events = self.get_unfetched_events()
+        if events:
+            self.delete_events_from_nodestore(events)
+            self.delete_dangling_attachments_and_user_reports(events)
+            # This value will be used in the next call to chunk
+            self.last_event = events[-1]
+            # As long as it returns True the task will keep iterating
+            return True
+        else:
+            self.delete_events_from_snuba()
+            return False
+
     def delete_events_from_nodestore(self, events: Sequence[Event]) -> None:
         # Remove from nodestore
         node_ids = [Event.generate_node_id(event.project_id, event.event_id) for event in events]
@@ -135,6 +152,12 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
             event_id__in=event_ids, project_id__in=self.project_ids
         ).delete()
 
+    def delete_events_from_snuba(self) -> None:
+        # Remove all group events now that their node data has been removed.
+        for project_id, group_ids in self.project_groups.items():
+            eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
+            eventstream.backend.end_delete_groups(eventstream_state)
+
 
 class GroupDeletionTask(ModelDeletionTask[Group]):
     # Delete groups in blocks of 1000. Using 1000 aims to
@@ -146,31 +169,41 @@ class GroupDeletionTask(ModelDeletionTask[Group]):
         Group deletion operates as a quasi-bulk operation so that we don't flood
         snuba replacements with deletions per group.
         """
-        self.mark_deletion_in_progress(instance_list)
+        if not instance_list:
+            return True
 
-        group_ids = [group.id for group in instance_list]
+        self.mark_deletion_in_progress(instance_list)
 
+        error_group_ids = [
+            group.id for group in instance_list if group.issue_category == GroupCategory.ERROR
+        ]
         # Tell seer to delete grouping records with these group hashes
-        call_delete_seer_grouping_records_by_hash(group_ids)
+        call_delete_seer_grouping_records_by_hash(error_group_ids)
+
+        self._delete_children(instance_list)
+
+        # Remove group objects with children removed.
+        self.delete_instance_bulk(instance_list)
 
+        return False
+
+    def _delete_children(self, instance_list: Sequence[Group]) -> None:
+        group_ids = [group.id for group in instance_list]
         # Remove child relations for all groups first.
         child_relations: list[BaseRelation] = []
         for model in _GROUP_RELATED_MODELS:
             child_relations.append(ModelRelation(model, {"group_id__in": group_ids}))
 
+        error_groups, _ = separate_by_group_category(instance_list)
+
         # If this isn't a retention cleanup also remove event data.
         if not os.environ.get("_SENTRY_CLEANUP"):
-            child_relations.append(
-                BaseRelation(params={"groups": instance_list}, task=EventDataDeletionTask)
-            )
+            if error_groups:
+                params = {"groups": error_groups}
+                child_relations.append(BaseRelation(params=params, task=ErrorEventsDeletionTask))
 
         self.delete_children(child_relations)
 
-        # Remove group objects with children removed.
-        self.delete_instance_bulk(instance_list)
-
-        return False
-
     def delete_instance(self, instance: Group) -> None:
         from sentry import similarity
 
@@ -183,3 +216,15 @@ class GroupDeletionTask(ModelDeletionTask[Group]):
         Group.objects.filter(id__in=[i.id for i in instance_list]).exclude(
             status=GroupStatus.DELETION_IN_PROGRESS
         ).update(status=GroupStatus.DELETION_IN_PROGRESS, substatus=None)
+
+
+def separate_by_group_category(instance_list: Sequence[Group]) -> tuple[list[Group], list[Group]]:
+    error_groups = []
+    issue_platform_groups = []
+    for group in instance_list:
+        (
+            error_groups.append(group)
+            if group.issue_category == GroupCategory.ERROR
+            else issue_platform_groups.append(group)
+        )
+    return error_groups, issue_platform_groups

+ 13 - 13
tests/sentry/deletions/test_group.py

@@ -3,10 +3,10 @@ from unittest import mock
 from uuid import uuid4
 
 from sentry import nodestore
-from sentry.deletions.defaults.group import EventDataDeletionTask
+from sentry.deletions.defaults.group import ErrorEventsDeletionTask
 from sentry.deletions.tasks.groups import delete_groups
 from sentry.eventstore.models import Event
-from sentry.issues.grouptype import ReplayDeadClickType
+from sentry.issues.grouptype import FeedbackGroup
 from sentry.models.eventattachment import EventAttachment
 from sentry.models.files.file import File
 from sentry.models.group import Group
@@ -22,7 +22,7 @@ from tests.sentry.issues.test_utils import OccurrenceTestMixin
 
 
 class DeleteGroupTest(TestCase, SnubaTestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         super().setUp()
         one_minute = iso_format(before_now(minutes=1))
         group1_data = {"timestamp": one_minute, "fingerprint": ["group1"]}
@@ -61,8 +61,8 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         GroupMeta.objects.create(group=group, key="foo", value="bar")
         GroupRedirect.objects.create(group_id=group.id, previous_group_id=1)
 
-    def test_simple(self):
-        EventDataDeletionTask.DEFAULT_CHUNK_SIZE = 1  # test chunking logic
+    def test_simple(self) -> None:
+        ErrorEventsDeletionTask.DEFAULT_CHUNK_SIZE = 1  # test chunking logic
         group = self.event.group
         assert nodestore.backend.get(self.node_id)
         assert nodestore.backend.get(self.node_id2)
@@ -83,7 +83,7 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         assert nodestore.backend.get(self.keep_node_id), "Does not remove from second group"
         assert Group.objects.filter(id=self.keep_event.group_id).exists()
 
-    def test_simple_multiple_groups(self):
+    def test_simple_multiple_groups(self) -> None:
         other_event = self.store_event(
             data={"timestamp": iso_format(before_now(minutes=1)), "fingerprint": ["group3"]},
             project_id=self.project.id,
@@ -102,7 +102,7 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         assert Group.objects.filter(id=self.keep_event.group_id).exists()
         assert nodestore.backend.get(self.keep_node_id)
 
-    def test_grouphistory_relation(self):
+    def test_grouphistory_relation(self) -> None:
         other_event = self.store_event(
             data={"timestamp": iso_format(before_now(minutes=1)), "fingerprint": ["group3"]},
             project_id=self.project.id,
@@ -133,7 +133,7 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
 
     @mock.patch("os.environ.get")
     @mock.patch("sentry.nodestore.delete_multi")
-    def test_cleanup(self, nodestore_delete_multi, os_environ):
+    def test_cleanup(self, nodestore_delete_multi: mock.Mock, os_environ: mock.Mock) -> None:
         os_environ.side_effect = lambda key: "1" if key == "_SENTRY_CLEANUP" else None
         group = self.event.group
 
@@ -146,8 +146,8 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         "sentry.tasks.delete_seer_grouping_records.delete_seer_grouping_records_by_hash.apply_async"
     )
     def test_delete_groups_delete_grouping_records_by_hash(
-        self, mock_delete_seer_grouping_records_by_hash_apply_async
-    ):
+        self, mock_delete_seer_grouping_records_by_hash_apply_async: mock.Mock
+    ) -> None:
         self.project.update_option("sentry:similarity_backfill_completed", int(time()))
         other_event = self.store_event(
             data={
@@ -182,13 +182,13 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
 
 
 class DeleteIssuePlatformTest(TestCase, SnubaTestCase, OccurrenceTestMixin):
-    def test_issue_platform(self):
+    def test_issue_platform(self) -> None:
         event = self.store_event(data={}, project_id=self.project.id)
         issue_occurrence, group_info = self.process_occurrence(
             event_id=event.event_id,
             project_id=self.project.id,
             # We are using ReplayDeadClickType as a representative of Issue Platform
-            type=ReplayDeadClickType.type_id,
+            type=FeedbackGroup.type_id,
             event_data={
                 "fingerprint": ["issue-platform-group"],
                 "timestamp": before_now(minutes=1).isoformat(),
@@ -207,7 +207,7 @@ class DeleteIssuePlatformTest(TestCase, SnubaTestCase, OccurrenceTestMixin):
         assert nodestore.backend.get(node_id)
 
         # The Issue Platform group and occurrence are deleted
-        assert issue_platform_group.issue_type == ReplayDeadClickType
+        assert issue_platform_group.issue_type == FeedbackGroup
         assert not Group.objects.filter(id=issue_platform_group.id).exists()
         node_id = Event.generate_node_id(issue_occurrence.project_id, issue_occurrence.id)
         assert not nodestore.backend.get(node_id)