Browse Source

ref(deletions): Split chunk method into multiple functions (#78293)

This makes the code easier to understand.
Armen Zambrano G. 5 months ago
parent
commit
ef63fb3c61

+ 1 - 1
src/sentry/deletions/base.py

@@ -178,7 +178,7 @@ class BaseDeletionTask(Generic[ModelT]):
 
 
 class ModelDeletionTask(BaseDeletionTask[ModelT]):
-    DEFAULT_QUERY_LIMIT = None
+    DEFAULT_QUERY_LIMIT: int | None = None
     manager_name = "objects"
 
     def __init__(

+ 35 - 20
src/sentry/deletions/defaults/group.py

@@ -61,9 +61,38 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
     ) -> None:
         self.groups = groups
         self.last_event: Event | None = None
+        self.set_group_and_project_ids()
         super().__init__(manager, **kwargs)
 
+    def set_group_and_project_ids(self) -> None:
+        group_ids = []
+        self.project_groups = defaultdict(list)
+        for group in self.groups:
+            self.project_groups[group.project_id].append(group.id)
+            group_ids.append(group.id)
+        self.group_ids = group_ids
+        self.project_ids = list(self.project_groups.keys())
+
     def chunk(self) -> bool:
+        """It deletes DEFAULT_CHUNK_SIZE number of events and related models.
+        It returns a boolean to say if the deletion has completed."""
+        events = self.get_unfetched_events()
+        if events:
+            self.delete_events_from_nodestore(events)
+            self.delete_dangling_attachments_and_user_reports(events)
+            # This value will be used in the next call to chunk
+            self.last_event = events[-1]
+            # As long as it returns True the task will keep iterating
+            return True
+        else:
+            # Remove all group events now that their node data has been removed.
+            for project_id, group_ids in self.project_groups.items():
+                # A message is sent to Snuba that will handle deleting the events for the given groups in the project
+                eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
+                eventstream.backend.end_delete_groups(eventstream_state)
+            return False
+
+    def get_unfetched_events(self) -> list[Event]:
         conditions = []
         if self.last_event is not None:
             conditions.extend(
@@ -76,16 +105,9 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
                 ]
             )
 
-        group_ids = []
-        project_groups = defaultdict(list)
-        for group in self.groups:
-            project_groups[group.project_id].append(group.id)
-            group_ids.append(group.id)
-        project_ids = list(project_groups.keys())
-
         events = eventstore.backend.get_unfetched_events(
             filter=eventstore.Filter(
-                conditions=conditions, project_ids=project_ids, group_ids=group_ids
+                conditions=conditions, project_ids=self.project_ids, group_ids=self.group_ids
             ),
             limit=self.DEFAULT_CHUNK_SIZE,
             referrer="deletions.group",
@@ -94,32 +116,25 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
                 {"organization_id": self.groups[0].project.organization_id} if self.groups else None
             ),
         )
-        if not events:
-            # Remove all group events now that their node data has been removed.
-            for project_id, group_ids in project_groups.items():
-                eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
-                eventstream.backend.end_delete_groups(eventstream_state)
-            return False
-
-        self.last_event = events[-1]
+        return events
 
+    def delete_events_from_nodestore(self, events: Sequence[Event]) -> None:
         # Remove from nodestore
         node_ids = [Event.generate_node_id(event.project_id, event.event_id) for event in events]
         nodestore.backend.delete_multi(node_ids)
 
+    def delete_dangling_attachments_and_user_reports(self, events: Sequence[Event]) -> None:
         # Remove EventAttachment and UserReport *again* as those may not have a
         # group ID, therefore there may be dangling ones after "regular" model
         # deletion.
         event_ids = [event.event_id for event in events]
         models.EventAttachment.objects.filter(
-            event_id__in=event_ids, project_id__in=project_ids
+            event_id__in=event_ids, project_id__in=self.project_ids
         ).delete()
         models.UserReport.objects.filter(
-            event_id__in=event_ids, project_id__in=project_ids
+            event_id__in=event_ids, project_id__in=self.project_ids
         ).delete()
 
-        return True
-
 
 class GroupDeletionTask(ModelDeletionTask[Group]):
     # Delete groups in blocks of 1000. Using 1000 aims to

+ 7 - 19
tests/sentry/deletions/test_group.py

@@ -40,8 +40,7 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
 
         # Group 2 event
         self.keep_event = self.store_event(data=group2_data, project_id=self.project.id)
-        self.event_id3 = self.keep_event.event_id
-        self.node_id3 = Event.generate_node_id(self.project.id, self.event_id3)
+        self.keep_node_id = Event.generate_node_id(self.project.id, self.keep_event.event_id)
 
         UserReport.objects.create(
             group_id=group.id, project_id=self.event.project_id, name="With group id"
@@ -67,7 +66,7 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         group = self.event.group
         assert nodestore.backend.get(self.node_id)
         assert nodestore.backend.get(self.node_id2)
-        assert nodestore.backend.get(self.node_id3)
+        assert nodestore.backend.get(self.keep_node_id)
 
         with self.tasks():
             delete_groups(object_ids=[group.id])
@@ -81,20 +80,15 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         assert not Group.objects.filter(id=group.id).exists()
         assert not nodestore.backend.get(self.node_id)
         assert not nodestore.backend.get(self.node_id2)
-        assert nodestore.backend.get(self.node_id3), "Does not remove from second group"
+        assert nodestore.backend.get(self.keep_node_id), "Does not remove from second group"
         assert Group.objects.filter(id=self.keep_event.group_id).exists()
 
     def test_simple_multiple_groups(self):
         other_event = self.store_event(
-            data={
-                "event_id": "d" * 32,
-                "timestamp": iso_format(before_now(minutes=1)),
-                "fingerprint": ["group3"],
-            },
+            data={"timestamp": iso_format(before_now(minutes=1)), "fingerprint": ["group3"]},
             project_id=self.project.id,
         )
         other_node_id = Event.generate_node_id(self.project.id, other_event.event_id)
-        keep_node_id = Event.generate_node_id(self.project.id, self.keep_event.event_id)
 
         group = self.event.group
         with self.tasks():
@@ -106,15 +100,11 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         assert not nodestore.backend.get(other_node_id)
 
         assert Group.objects.filter(id=self.keep_event.group_id).exists()
-        assert nodestore.backend.get(keep_node_id)
+        assert nodestore.backend.get(self.keep_node_id)
 
     def test_grouphistory_relation(self):
         other_event = self.store_event(
-            data={
-                "event_id": "d" * 32,
-                "timestamp": iso_format(before_now(minutes=1)),
-                "fingerprint": ["group3"],
-            },
+            data={"timestamp": iso_format(before_now(minutes=1)), "fingerprint": ["group3"]},
             project_id=self.project.id,
         )
         other_group = other_event.group
@@ -161,14 +151,12 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         self.project.update_option("sentry:similarity_backfill_completed", int(time()))
         other_event = self.store_event(
             data={
-                "event_id": "d" * 32,
                 "timestamp": iso_format(before_now(minutes=1)),
                 "fingerprint": ["group3"],
             },
             project_id=self.project.id,
         )
         other_node_id = Event.generate_node_id(self.project.id, other_event.event_id)
-        keep_node_id = Event.generate_node_id(self.project.id, self.keep_event.event_id)
 
         hashes = [
             grouphash.hash
@@ -186,7 +174,7 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         assert not nodestore.backend.get(other_node_id)
 
         assert Group.objects.filter(id=self.keep_event.group_id).exists()
-        assert nodestore.backend.get(keep_node_id)
+        assert nodestore.backend.get(self.keep_node_id)
 
         assert mock_delete_seer_grouping_records_by_hash_apply_async.call_args[1] == {
             "args": [group.project.id, hashes, 0]