Browse Source

perf: Add indexes to tables that are causing sequential scans (#31843)

We added alerting to sequential scans in production, which turned up that we're doing a large number
of seq scans on `MonitorCheckin` and `GroupEmailThread`. Adding in indexes here to fix this.

Relevant queries:
For `MonitorCheckin`
https://github.com/getsentry/sentry/blob/709795f07b8ef66ee43d26c0ebeb036b28c53eb4/src/sentry/tasks/check_monitors.py#L63-L67
https://github.com/getsentry/sentry/blob/709795f07b8ef66ee43d26c0ebeb036b28c53eb4/src/sentry/tasks/check_monitors.py#L42-L44

`GroupEmailThread`
Deletions in cleanup.py https://github.com/getsentry/sentry/blob/7301feb2b05e73dbe6a56e6f4719fae23eaf1a91/src/sentry/runner/commands/cleanup.py#L176-L180
Dan Fuller 3 years ago
parent
commit
33be603494

+ 1 - 1
migrations_lockfile.txt

@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 will then be regenerated, and you should be able to merge without conflicts.
 
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0271_add_codeowners_auto_sync_setting
+sentry: 0272_seq_scan_indexes
 social_auth: 0001_initial

+ 51 - 0
src/sentry/migrations/0272_seq_scan_indexes.py

@@ -0,0 +1,51 @@
+# Generated by Django 2.2.24 on 2022-02-15 21:27
+
+from django.db import migrations, models
+
+import sentry.db.models.fields.bounded
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = True
+
+    # This flag is used to decide whether to run this migration in a transaction or not. Generally
+    # we don't want to run in a transaction here, since for long running operations like data
+    # back-fills this results in us locking an increasing number of rows until we finally commit.
+    atomic = False
+
+    dependencies = [
+        ("sentry", "0271_add_codeowners_auto_sync_setting"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="monitorcheckin",
+            name="status",
+            field=sentry.db.models.fields.bounded.BoundedPositiveIntegerField(
+                db_index=True, default=0
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="groupemailthread",
+            index=models.Index(
+                fields=["date", "project", "id"], name="sentry_grou_date_d4eb5a_idx"
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="monitorcheckin",
+            index=models.Index(
+                fields=["monitor", "date_added", "status"], name="sentry_moni_monitor_0a49ce_idx"
+            ),
+        ),
+    ]

+ 1 - 0
src/sentry/models/groupemailthread.py

@@ -26,5 +26,6 @@ class GroupEmailThread(Model):
         app_label = "sentry"
         db_table = "sentry_groupemailthread"
         unique_together = (("email", "group"), ("email", "msgid"))
+        indexes = [models.Index(fields=["date", "project", "id"])]
 
     __repr__ = sane_repr("email", "group_id", "msgid")

+ 6 - 1
src/sentry/models/monitorcheckin.py

@@ -37,7 +37,9 @@ class MonitorCheckIn(Model):
     project_id = BoundedPositiveIntegerField(db_index=True)
     monitor = FlexibleForeignKey("sentry.Monitor")
     location = FlexibleForeignKey("sentry.MonitorLocation", null=True)
-    status = BoundedPositiveIntegerField(default=0, choices=CheckInStatus.as_choices())
+    status = BoundedPositiveIntegerField(
+        default=0, choices=CheckInStatus.as_choices(), db_index=True
+    )
     config = EncryptedJsonField(default=dict)
     duration = BoundedPositiveIntegerField(null=True)
     date_added = models.DateTimeField(default=timezone.now)
@@ -47,6 +49,9 @@ class MonitorCheckIn(Model):
     class Meta:
         app_label = "sentry"
         db_table = "sentry_monitorcheckin"
+        indexes = [
+            models.Index(fields=["monitor", "date_added", "status"]),
+        ]
 
     __repr__ = sane_repr("guid", "project_id", "status")