|
@@ -2,13 +2,15 @@ from __future__ import annotations
|
|
|
|
|
|
import os
|
|
|
from collections import defaultdict
|
|
|
-from collections.abc import Sequence
|
|
|
+from collections.abc import Mapping, Sequence
|
|
|
from typing import Any
|
|
|
|
|
|
from sentry import eventstore, eventstream, models, nodestore
|
|
|
from sentry.eventstore.models import Event
|
|
|
+from sentry.issues.grouptype import GroupCategory
|
|
|
from sentry.models.group import Group, GroupStatus
|
|
|
from sentry.models.rulefirehistory import RuleFireHistory
|
|
|
+from sentry.snuba.dataset import Dataset
|
|
|
from sentry.tasks.delete_seer_grouping_records import call_delete_seer_grouping_records_by_hash
|
|
|
|
|
|
from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation
|
|
@@ -48,18 +50,21 @@ _GROUP_RELATED_MODELS = DIRECT_GROUP_RELATED_MODELS + (
|
|
|
)
|
|
|
|
|
|
|
|
|
-class EventDataDeletionTask(BaseDeletionTask[Group]):
|
|
|
+class EventsBaseDeletionTask(BaseDeletionTask[Group]):
|
|
|
"""
|
|
|
- Deletes nodestore data, EventAttachment and UserReports for group
|
|
|
+ Base class to delete events associated to groups and its related models.
|
|
|
"""
|
|
|
|
|
|
# Number of events fetched from eventstore per chunk() call.
|
|
|
DEFAULT_CHUNK_SIZE = 10000
|
|
|
+ referrer = "deletions.group"
|
|
|
+ dataset: Dataset
|
|
|
|
|
|
def __init__(
|
|
|
self, manager: DeletionTaskManager, groups: Sequence[Group], **kwargs: Any
|
|
|
) -> None:
|
|
|
self.groups = groups
|
|
|
+ # Use self.last_event to keep track of the last event processed in the chunk method.
|
|
|
self.last_event: Event | None = None
|
|
|
self.set_group_and_project_ids()
|
|
|
super().__init__(manager, **kwargs)
|
|
@@ -73,25 +78,6 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
|
|
|
self.group_ids = group_ids
|
|
|
self.project_ids = list(self.project_groups.keys())
|
|
|
|
|
|
- def chunk(self) -> bool:
|
|
|
- """It deletes DEFAULT_CHUNK_SIZE number of events and related models.
|
|
|
- It returns a boolean to say if the deletion has completed."""
|
|
|
- events = self.get_unfetched_events()
|
|
|
- if events:
|
|
|
- self.delete_events_from_nodestore(events)
|
|
|
- self.delete_dangling_attachments_and_user_reports(events)
|
|
|
- # This value will be used in the next call to chunk
|
|
|
- self.last_event = events[-1]
|
|
|
- # As long as it returns True the task will keep iterating
|
|
|
- return True
|
|
|
- else:
|
|
|
- # Remove all group events now that their node data has been removed.
|
|
|
- for project_id, group_ids in self.project_groups.items():
|
|
|
- # A message is sent to Snuba that will handle deleting the events for the given groups in the project
|
|
|
- eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
|
|
|
- eventstream.backend.end_delete_groups(eventstream_state)
|
|
|
- return False
|
|
|
-
|
|
|
def get_unfetched_events(self) -> list[Event]:
|
|
|
conditions = []
|
|
|
if self.last_event is not None:
|
|
@@ -110,14 +96,45 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
|
|
|
conditions=conditions, project_ids=self.project_ids, group_ids=self.group_ids
|
|
|
),
|
|
|
limit=self.DEFAULT_CHUNK_SIZE,
|
|
|
- referrer="deletions.group",
|
|
|
+ referrer=self.referrer,
|
|
|
orderby=["-timestamp", "-event_id"],
|
|
|
- tenant_ids=(
|
|
|
- {"organization_id": self.groups[0].project.organization_id} if self.groups else None
|
|
|
- ),
|
|
|
+ tenant_ids=self.tenant_ids,
|
|
|
+ dataset=self.dataset,
|
|
|
)
|
|
|
return events
|
|
|
|
|
|
+ @property
|
|
|
+ def tenant_ids(self) -> Mapping[str, Any]:
|
|
|
+ result = {"referrer": self.referrer}
|
|
|
+ if self.groups:
|
|
|
+ result["organization_id"] = self.groups[0].project.organization_id
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
+class ErrorEventsDeletionTask(EventsBaseDeletionTask):
|
|
|
+ """
|
|
|
+ Deletes nodestore data, EventAttachment and UserReports for requested groups.
|
|
|
+
|
|
|
+ This class uses the old Snuba deletion method.
|
|
|
+ """
|
|
|
+
|
|
|
+ dataset = Dataset.Events
|
|
|
+
|
|
|
+ def chunk(self) -> bool:
|
|
|
+ """This method is called to delete chunks of data. It returns a boolean to say
|
|
|
+ if the deletion has completed and if it needs to be called again."""
|
|
|
+ events = self.get_unfetched_events()
|
|
|
+ if events:
|
|
|
+ self.delete_events_from_nodestore(events)
|
|
|
+ self.delete_dangling_attachments_and_user_reports(events)
|
|
|
+ # This value will be used in the next call to chunk
|
|
|
+ self.last_event = events[-1]
|
|
|
+ # As long as it returns True the task will keep iterating
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ self.delete_events_from_snuba()
|
|
|
+ return False
|
|
|
+
|
|
|
def delete_events_from_nodestore(self, events: Sequence[Event]) -> None:
|
|
|
# Remove from nodestore
|
|
|
node_ids = [Event.generate_node_id(event.project_id, event.event_id) for event in events]
|
|
@@ -135,6 +152,12 @@ class EventDataDeletionTask(BaseDeletionTask[Group]):
|
|
|
event_id__in=event_ids, project_id__in=self.project_ids
|
|
|
).delete()
|
|
|
|
|
|
+ def delete_events_from_snuba(self) -> None:
|
|
|
+ # Remove all group events now that their node data has been removed.
|
|
|
+ for project_id, group_ids in self.project_groups.items():
|
|
|
+ eventstream_state = eventstream.backend.start_delete_groups(project_id, group_ids)
|
|
|
+ eventstream.backend.end_delete_groups(eventstream_state)
|
|
|
+
|
|
|
|
|
|
class GroupDeletionTask(ModelDeletionTask[Group]):
|
|
|
# Delete groups in blocks of 1000. Using 1000 aims to
|
|
@@ -146,31 +169,41 @@ class GroupDeletionTask(ModelDeletionTask[Group]):
|
|
|
Group deletion operates as a quasi-bulk operation so that we don't flood
|
|
|
snuba replacements with deletions per group.
|
|
|
"""
|
|
|
- self.mark_deletion_in_progress(instance_list)
|
|
|
+ if not instance_list:
|
|
|
+ return True
|
|
|
|
|
|
- group_ids = [group.id for group in instance_list]
|
|
|
+ self.mark_deletion_in_progress(instance_list)
|
|
|
|
|
|
+ error_group_ids = [
|
|
|
+ group.id for group in instance_list if group.issue_category == GroupCategory.ERROR
|
|
|
+ ]
|
|
|
# Tell seer to delete grouping records with these group hashes
|
|
|
- call_delete_seer_grouping_records_by_hash(group_ids)
|
|
|
+ call_delete_seer_grouping_records_by_hash(error_group_ids)
|
|
|
+
|
|
|
+ self._delete_children(instance_list)
|
|
|
+
|
|
|
+ # Remove group objects with children removed.
|
|
|
+ self.delete_instance_bulk(instance_list)
|
|
|
|
|
|
+ return False
|
|
|
+
|
|
|
+ def _delete_children(self, instance_list: Sequence[Group]) -> None:
|
|
|
+ group_ids = [group.id for group in instance_list]
|
|
|
# Remove child relations for all groups first.
|
|
|
child_relations: list[BaseRelation] = []
|
|
|
for model in _GROUP_RELATED_MODELS:
|
|
|
child_relations.append(ModelRelation(model, {"group_id__in": group_ids}))
|
|
|
|
|
|
+ error_groups, _ = separate_by_group_category(instance_list)
|
|
|
+
|
|
|
# If this isn't a retention cleanup also remove event data.
|
|
|
if not os.environ.get("_SENTRY_CLEANUP"):
|
|
|
- child_relations.append(
|
|
|
- BaseRelation(params={"groups": instance_list}, task=EventDataDeletionTask)
|
|
|
- )
|
|
|
+ if error_groups:
|
|
|
+ params = {"groups": error_groups}
|
|
|
+ child_relations.append(BaseRelation(params=params, task=ErrorEventsDeletionTask))
|
|
|
|
|
|
self.delete_children(child_relations)
|
|
|
|
|
|
- # Remove group objects with children removed.
|
|
|
- self.delete_instance_bulk(instance_list)
|
|
|
-
|
|
|
- return False
|
|
|
-
|
|
|
def delete_instance(self, instance: Group) -> None:
|
|
|
from sentry import similarity
|
|
|
|
|
@@ -183,3 +216,15 @@ class GroupDeletionTask(ModelDeletionTask[Group]):
|
|
|
Group.objects.filter(id__in=[i.id for i in instance_list]).exclude(
|
|
|
status=GroupStatus.DELETION_IN_PROGRESS
|
|
|
).update(status=GroupStatus.DELETION_IN_PROGRESS, substatus=None)
|
|
|
+
|
|
|
+
|
|
|
+def separate_by_group_category(instance_list: Sequence[Group]) -> tuple[list[Group], list[Group]]:
|
|
|
+ error_groups = []
|
|
|
+ issue_platform_groups = []
|
|
|
+ for group in instance_list:
|
|
|
+ (
|
|
|
+ error_groups.append(group)
|
|
|
+ if group.issue_category == GroupCategory.ERROR
|
|
|
+ else issue_platform_groups.append(group)
|
|
|
+ )
|
|
|
+ return error_groups, issue_platform_groups
|