Browse Source

chore(issues): Backfill UNRESOLVED groups with missing substatuses (#76082)

We need to run a backfill to fix the missing substatuses for UNRESOLVED
groups. This [redash query](https://redash.getsentry.net/queries/6888)
shows we have ~500 unresolved groups that have no substatus. We can
backfill the groups by
- setting the substatus to `NEW` if the group is first seen in the last
7 days
- setting the substatus to `REGRESSED` if there is a matching
GroupHistory row in the past 7 days
- setting the substatus to `ONGOING` if no other info is present. In
this case, we're assuming any status changes were older than 7 days in
which case the auto_transition tasks would mark this group as `ONGOING`.

Marking this as a post-deploy migration for safety, but we'll only be
updating 500ish groups.

https://github.com/getsentry/sentry/issues/76076
Snigdha Sharma 6 months ago
parent
commit
f36c906e6b

+ 1 - 1
migrations_lockfile.txt

@@ -10,6 +10,6 @@ hybridcloud: 0016_add_control_cacheversion
 nodestore: 0002_nodestore_no_dictfield
 remote_subscriptions: 0003_drop_remote_subscription
 replays: 0004_index_together
-sentry: 0751_grouphashmetadata_use_one_to_one_field_for_grouphash
+sentry: 0752_fix_substatus_for_unresolved_groups
 social_auth: 0002_default_auto_field
 uptime: 0007_update_detected_subscription_interval

+ 85 - 0
src/sentry/migrations/0752_fix_substatus_for_unresolved_groups.py

@@ -0,0 +1,85 @@
+# Generated by Django 5.0.7 on 2024-08-13 17:06
+
+
+from datetime import timedelta
+
+from django.apps.registry import Apps
+from django.db import migrations
+from django.db.backends.base.schema import BaseDatabaseSchemaEditor
+from django.utils import timezone
+
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+# Copying constants defined in the models
+class GroupHistoryStatus:
+    REGRESSED = 7
+
+
+class GroupSubStatus:
+    ONGOING = 3
+    REGRESSED = 6
+    NEW = 7
+
+
+class GroupStatus:
+    UNRESOLVED = 0
+
+
+# End copy
+
+
+def backfill_substatus_for_unresolved_groups(
+    apps: Apps, schema_editor: BaseDatabaseSchemaEditor
+) -> None:
+    Group = apps.get_model("sentry", "Group")
+    GroupHistory = apps.get_model("sentry", "GroupHistory")
+
+    seven_days_ago = timezone.now() - timedelta(days=7)
+    groups = Group.objects.filter(status=GroupStatus.UNRESOLVED, substatus=None)
+    group_history = GroupHistory.objects.filter(
+        date_added__gt=seven_days_ago, status=GroupHistoryStatus.REGRESSED
+    )
+
+    for group in groups:
+        new_substatus = None
+        if group.first_seen > seven_days_ago:
+            new_substatus = GroupSubStatus.NEW
+        else:
+            histories = group_history.filter(group=group)
+            if histories.exists():
+                new_substatus = GroupSubStatus.REGRESSED
+
+        if new_substatus is None:
+            new_substatus = GroupSubStatus.ONGOING
+
+        group.substatus = new_substatus
+        group.save()
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production.
+    # This should only be used for operations where it's safe to run the migration after your
+    # code has deployed. So this should not be used for most operations that alter the schema
+    # of a table.
+    # Here are some things that make sense to mark as post deployment:
+    # - Large data migrations. Typically we want these to be run manually so that they can be
+    #   monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   run this outside deployments so that we don't block them. Note that while adding an index
+    #   is a schema change, it's completely safe to run the operation after the code has deployed.
+    # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
+
+    is_post_deployment = True
+
+    dependencies = [
+        ("sentry", "0751_grouphashmetadata_use_one_to_one_field_for_grouphash"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            backfill_substatus_for_unresolved_groups,
+            migrations.RunPython.noop,
+            hints={"tables": ["sentry_groupedmessage", "sentry_grouphistory"]},
+        ),
+    ]

+ 71 - 0
tests/sentry/migrations/test_0752_fix_substatus_for_unresolved_groups.py

@@ -0,0 +1,71 @@
+from datetime import timedelta
+
+from django.utils import timezone
+
+from sentry.models.group import Group, GroupStatus
+from sentry.models.grouphistory import GroupHistory, GroupHistoryStatus
+from sentry.models.organization import Organization
+from sentry.testutils.cases import TestMigrations
+from sentry.types.group import GroupSubStatus
+
+
+class BackfillMissingUnresolvedSubstatusTest(TestMigrations):
+    migrate_from = "0751_grouphashmetadata_use_one_to_one_field_for_grouphash"
+    migrate_to = "0752_fix_substatus_for_unresolved_groups"
+
+    def setup_before_migration(self, app):
+        self.organization = Organization.objects.create(name="test", slug="test")
+        self.project = self.create_project(organization=self.organization)
+        self.do_not_update = Group.objects.create(
+            project=self.project,
+            status=GroupStatus.UNRESOLVED,
+            substatus=GroupSubStatus.NEW,
+        )
+
+        self.ongoing_group = Group.objects.create(
+            project=self.project,
+            status=GroupStatus.UNRESOLVED,
+        )
+        # .update() skips calling the pre_save checks which add a substatus
+        self.ongoing_group.update(
+            substatus=None,
+            first_seen=timezone.now() - timedelta(days=8),
+        )
+        self.ongoing_group.refresh_from_db()
+        assert self.ongoing_group.substatus is None
+
+        self.regressed_group = Group.objects.create(
+            project=self.project,
+            status=GroupStatus.UNRESOLVED,
+            first_seen=timezone.now() - timedelta(days=8),
+        )
+        self.regressed_group.update(substatus=None)
+        assert self.regressed_group.substatus is None
+        GroupHistory.objects.create(
+            group=self.regressed_group,
+            date_added=timezone.now() - timedelta(days=1),
+            organization_id=self.organization.id,
+            project_id=self.project.id,
+            status=GroupHistoryStatus.REGRESSED,
+        )
+
+        self.new_group = Group.objects.create(
+            project=self.project,
+            status=GroupStatus.UNRESOLVED,
+            first_seen=timezone.now(),
+        )
+        self.new_group.update(substatus=None)
+        assert self.new_group.substatus is None
+
+    def test(self):
+        self.do_not_update.refresh_from_db()
+        assert self.do_not_update.substatus == GroupSubStatus.NEW
+
+        self.ongoing_group.refresh_from_db()
+        assert self.ongoing_group.substatus == GroupSubStatus.ONGOING
+
+        self.regressed_group.refresh_from_db()
+        assert self.regressed_group.substatus == GroupSubStatus.REGRESSED
+
+        self.new_group.refresh_from_db()
+        assert self.new_group.substatus == GroupSubStatus.NEW