Browse Source

perf: Fix seq scans to `GroupLink` (#31849)

We are seq scanning here: https://github.com/getsentry/sentry/blob/1e437a123a5439a911b6a72bd3acbf2f01000309/src/sentry/notifications/utils/__init__.py#L58-L74

Modified this code to filter on project as well, and added an index to support this. I didn't want
to add an unqualified index on `(linked_id,  linked_type)` since looking up by those values isn't
properly scoped.
Dan Fuller 3 years ago
parent
commit
d77db3ed0a

+ 1 - 1
migrations_lockfile.txt

@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 will then be regenerated, and you should be able to merge without conflicts.
 
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0272_seq_scan_indexes
+sentry: 0273_fix_grouplink_seqscans
 social_auth: 0001_initial

+ 38 - 0
src/sentry/migrations/0273_fix_grouplink_seqscans.py

@@ -0,0 +1,38 @@
+# Generated by Django 2.2.24 on 2022-02-15 22:44
+
+from django.db import migrations, models
+
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = False
+
+    # This flag is used to decide whether to run this migration in a transaction or not. Generally
+    # we don't want to run in a transaction here, since for long running operations like data
+    # back-fills this results in us locking an increasing number of rows until we finally commit.
+    atomic = False
+
+    dependencies = [
+        ("sentry", "0272_seq_scan_indexes"),
+    ]
+
+    operations = [
+        migrations.AddIndex(
+            model_name="grouplink",
+            index=models.Index(
+                fields=["project", "linked_id", "linked_type", "group"],
+                name="sentry_grou_project_dd3e95_idx",
+            ),
+        ),
+    ]

+ 1 - 0
src/sentry/models/grouplink.py

@@ -77,5 +77,6 @@ class GroupLink(Model):
         app_label = "sentry"
         db_table = "sentry_grouplink"
         unique_together = (("group", "linked_type", "linked_id"),)
+        indexes = [models.Index(fields=["project", "linked_id", "linked_type", "group"])]
 
     __repr__ = sane_repr("group_id", "linked_type", "linked_id", "relationship", "datetime")

+ 1 - 0
src/sentry/notifications/utils/__init__.py

@@ -62,6 +62,7 @@ def get_group_counts_by_project(
         Group.objects.filter(
             project__in=projects,
             id__in=GroupLink.objects.filter(
+                project__in=projects,
                 linked_type=GroupLink.LinkedType.commit,
                 linked_id__in=ReleaseCommit.objects.filter(release=release).values_list(
                     "commit_id", flat=True