Browse Source

ref(eventattachment): Add type column to EventAttachment model (#20779)

* ref(eventattachment): Add type column to EventAttachment model

First step in optimizing query executed for each event as part of `save_event`
task. The new column will mirror the `sentry.File.type` column, will be indexed
and used for `EventAttachment` query which would otherwise need join in the
`File` table.

* ref(eventattachment): Add backfill megration for EventAttachment.type

* ref(eventattachment): Fill the type column when creating EventAttachment
Michal Kuffa 4 years ago
parent
commit
329ee0ea57

+ 1 - 1
migrations_lockfile.txt

@@ -10,7 +10,7 @@ auth: 0008_alter_user_username_max_length
 contenttypes: 0002_remove_content_type_name
 jira_ac: 0001_initial
 nodestore: 0001_initial
-sentry: 0099_fix_project_platforms
+sentry: 0101_backfill_file_type_on_event_attachment
 sessions: 0001_initial
 sites: 0002_alter_domain_unique
 social_auth: 0001_initial

+ 1 - 0
src/sentry/event_manager.py

@@ -1301,6 +1301,7 @@ def save_attachment(
         group_id=group_id,
         name=attachment.name,
         file=file,
+        type=attachment.type,
     )
 
     track_outcome(

+ 36 - 0
src/sentry/migrations/0100_file_type_on_event_attachment.py

@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.29 on 2020-09-15 08:00
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    # This flag is used to mark that a migration shouldn't be automatically run in
+    # production. We set this to True for operations that we think are risky and want
+    # someone from ops to run manually and monitor.
+    # General advice is that if in doubt, mark your migration as `is_dangerous`.
+    # Some things you should always mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that
+    #   they can be monitored. Since data migrations will now hold a transaction open
+    #   this is even more important.
+    # - Adding columns to highly active tables, even ones that are NULL.
+    is_dangerous = False
+
+    # This flag is used to decide whether to run this migration in a transaction or not.
+    # By default we prefer to run in a transaction, but for migrations where you want
+    # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll
+    # want to create an index concurrently when adding one to an existing table.
+    atomic = False
+
+    dependencies = [
+        ("sentry", "0099_fix_project_platforms"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="eventattachment",
+            name="type",
+            field=models.CharField(db_index=True, max_length=64, null=True),
+        ),
+    ]

+ 47 - 0
src/sentry/migrations/0101_backfill_file_type_on_event_attachment.py

@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.27 on 2020-01-23 19:07
+from __future__ import unicode_literals
+
+import logging
+
+from django.db import migrations
+
+from sentry import eventstore
+from sentry.utils.query import RangeQuerySetWrapper
+
+logger = logging.getLogger(__name__)
+
+
+def backfill_file_type(apps, schema_editor):
+    """
+    Fill the new EventAttachment.type column with values from EventAttachment.file.type.
+    """
+    EventAttachment = apps.get_model("sentry", "EventAttachment")
+    all_event_attachments = EventAttachment.objects.select_related("file").all()
+    for event_attachment in RangeQuerySetWrapper(all_event_attachments, step=1000):
+        event_attachment.type = event_attachment.file.type
+        event_attachment.save(update_fields=["type"])
+
+
+class Migration(migrations.Migration):
+    # This flag is used to mark that a migration shouldn't be automatically run in
+    # production. We set this to True for operations that we think are risky and want
+    # someone from ops to run manually and monitor.
+    # General advice is that if in doubt, mark your migration as `is_dangerous`.
+    # Some things you should always mark as dangerous:
+    # - Adding indexes to large tables. These indexes should be created concurrently,
+    #   unfortunately we can't run migrations outside of a transaction until Django
+    #   1.10. So until then these should be run manually.
+    # - Large data migrations. Typically we want these to be run manually by ops so that
+    #   they can be monitored. Since data migrations will now hold a transaction open
+    #   this is even more important.
+    # - Adding columns to highly active tables, even ones that are NULL.
+    is_dangerous = True
+
+    dependencies = [
+        ("sentry", "0100_file_type_on_event_attachment"),
+    ]
+
+    operations = [
+        migrations.RunPython(backfill_file_type, migrations.RunPython.noop),
+    ]

+ 1 - 0
src/sentry/models/eventattachment.py

@@ -27,6 +27,7 @@ class EventAttachment(Model):
     group_id = BoundedBigIntegerField(null=True, db_index=True)
     event_id = models.CharField(max_length=32, db_index=True)
     file = FlexibleForeignKey("sentry.File")
+    type = models.CharField(max_length=64, db_index=True, null=True)
     name = models.TextField()
     date_added = models.DateTimeField(default=timezone.now, db_index=True)
 

+ 5 - 1
src/sentry/testutils/factories.py

@@ -571,7 +571,11 @@ class Factories(object):
             )
 
         return EventAttachment.objects.create(
-            project_id=event.project_id, event_id=event.event_id, file=file, **kwargs
+            project_id=event.project_id,
+            event_id=event.event_id,
+            file=file,
+            type=file.type,
+            **kwargs
         )
 
     @staticmethod

+ 1 - 0
tests/sentry/api/endpoints/test_event_attachment_details.py

@@ -28,6 +28,7 @@ class CreateAttachmentMixin(object):
             event_id=self.event.event_id,
             project_id=self.event.project_id,
             file=self.file,
+            type=self.file.type,
             name="hello.png",
         )
 

+ 3 - 2
tests/sentry/api/endpoints/test_event_attachments.py

@@ -25,11 +25,12 @@ class EventAttachmentsTest(APITestCase):
             file=File.objects.create(name="hello.png", type="image/png"),
             name="hello.png",
         )
-
+        file = File.objects.create(name="hello.png", type="image/png")
         EventAttachment.objects.create(
             event_id=event2.event_id,
             project_id=event2.project_id,
-            file=File.objects.create(name="hello.png", type="image/png"),
+            file=file,
+            type=file.type,
             name="hello.png",
         )
 

+ 1 - 0
tests/sentry/api/endpoints/test_group_attachments.py

@@ -20,6 +20,7 @@ class GroupEventAttachmentsTest(APITestCase):
             project_id=self.event.project_id,
             group_id=self.group.id,
             file=self.file,
+            type=self.file.type,
             name="hello.png",
         )
 

+ 3 - 1
tests/sentry/deletions/test_group.py

@@ -65,10 +65,12 @@ class DeleteGroupTest(TestCase, SnubaTestCase):
         UserReport.objects.create(
             event_id=self.event.event_id, project_id=self.event.project_id, name="With event id"
         )
+        file = File.objects.create(name="hello.png", type="image/png")
         EventAttachment.objects.create(
             event_id=self.event.event_id,
             project_id=self.event.project_id,
-            file=File.objects.create(name="hello.png", type="image/png"),
+            file=file,
+            type=file.type,
             name="hello.png",
         )
         GroupAssignee.objects.create(group=group, project=self.project, user=self.user)

Some files were not shown because too many files changed in this diff