Browse Source

ref(fileblobowner): Remove fk constraint from fileblobowner to organization (#25915)

Michal Kuffa 3 years ago
parent
commit
6f669805d1

+ 1 - 1
migrations_lockfile.txt

@@ -7,5 +7,5 @@ will then be regenerated, and you should be able to merge without conflicts.
 
 jira_ac: 0001_initial
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0191_make_externalactor_integration_id_not_null
+sentry: 0192_remove_fileblobowner_org_fk
 social_auth: 0001_initial

+ 1 - 1
src/sentry/api/endpoints/debug_files.py

@@ -260,7 +260,7 @@ def find_missing_chunks(organization, chunks):
     """Returns a list of chunks which are missing for an org."""
     owned = set(
         FileBlobOwner.objects.filter(
-            blob__checksum__in=chunks, organization=organization
+            blob__checksum__in=chunks, organization_id=organization.id
         ).values_list("blob__checksum", flat=True)
     )
     return list(set(chunks) - owned)

+ 62 - 0
src/sentry/migrations/0192_remove_fileblobowner_org_fk.py

@@ -0,0 +1,62 @@
+# Generated by Django 1.11.29 on 2021-05-06 10:03
+
+from django.db import migrations
+
+import sentry.db.models.fields.bounded
+
+
+class Migration(migrations.Migration):
+    # This flag is used to mark that a migration shouldn't be automatically run in
+    # production. We set this to True for operations that we think are risky and want
+    # someone from ops to run manually and monitor.
+    # General advice is that if in doubt, mark your migration as `is_dangerous`.
+    # Some things you should always mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that
+    #   they can be monitored. Since data migrations will now hold a transaction open
+    #   this is even more important.
+    # - Adding columns to highly active tables, even ones that are NULL.
+    is_dangerous = True
+
+    # This flag is used to decide whether to run this migration in a transaction or not.
+    # By default we prefer to run in a transaction, but for migrations where you want
+    # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll
+    # want to create an index concurrently when adding one to an existing table.
+    # You'll also usually want to set this to `False` if you're writing a data
+    # migration, since we don't want the entire migration to run in one long-running
+    # transaction.
+    atomic = False
+
+    dependencies = [
+        ("sentry", "0191_make_externalactor_integration_id_not_null"),
+    ]
+
+    operations = [
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                migrations.AlterField(
+                    model_name="fileblobowner",
+                    name="organization",
+                    field=sentry.db.models.fields.foreignkey.FlexibleForeignKey(
+                        to="sentry.Organization",
+                        db_constraint=False,
+                    ),
+                ),
+            ],
+            state_operations=[
+                migrations.AddField(
+                    model_name="fileblobowner",
+                    name="organization_id",
+                    field=sentry.db.models.fields.bounded.BoundedBigIntegerField(db_index=True),
+                    preserve_default=False,
+                ),
+                migrations.RemoveField(
+                    model_name="fileblobowner",
+                    name="organization",
+                ),
+                migrations.AlterUniqueTogether(
+                    name="fileblobowner",
+                    unique_together={("blob", "organization_id")},
+                ),
+            ],
+        )
+    ]

+ 12 - 6
src/sentry/models/file.py

@@ -12,11 +12,17 @@ from django.conf import settings
 from django.core.files.base import ContentFile
 from django.core.files.base import File as FileObj
 from django.core.files.storage import get_storage_class
-from django.db import IntegrityError, models, transaction
+from django.db import IntegrityError, models, router, transaction
 from django.utils import timezone
 
 from sentry.app import locks
-from sentry.db.models import BoundedPositiveIntegerField, FlexibleForeignKey, JSONField, Model
+from sentry.db.models import (
+    BoundedBigIntegerField,
+    BoundedPositiveIntegerField,
+    FlexibleForeignKey,
+    JSONField,
+    Model,
+)
 from sentry.tasks.files import delete_file as delete_file_task
 from sentry.tasks.files import delete_unreferenced_blobs
 from sentry.utils import metrics
@@ -153,8 +159,8 @@ class FileBlob(Model):
             if organization is None:
                 return
             try:
-                with transaction.atomic():
-                    FileBlobOwner.objects.create(organization=organization, blob=blob)
+                with transaction.atomic(using=router.db_for_write(FileBlobOwner)):
+                    FileBlobOwner.objects.create(organization_id=organization.id, blob=blob)
             except IntegrityError:
                 pass
 
@@ -624,12 +630,12 @@ class FileBlobOwner(Model):
     __core__ = False
 
     blob = FlexibleForeignKey("sentry.FileBlob")
-    organization = FlexibleForeignKey("sentry.Organization")
+    organization_id = BoundedBigIntegerField(db_index=True)
 
     class Meta:
         app_label = "sentry"
         db_table = "sentry_fileblobowner"
-        unique_together = (("blob", "organization"),)
+        unique_together = (("blob", "organization_id"),)
 
 
 def clear_cached_files(cache_path):

+ 4 - 4
tests/sentry/api/endpoints/test_dif_assemble.py

@@ -83,7 +83,7 @@ class DifAssembleEndpoint(APITestCase):
         # Now we add ownership to the blob
         blobs = FileBlob.objects.all()
         for blob in blobs:
-            FileBlobOwner.objects.create(blob=blob, organization=self.organization)
+            FileBlobOwner.objects.create(blob=blob, organization_id=self.organization.id)
 
         # The request will start the job to assemble the file
         response = self.client.post(
@@ -149,9 +149,9 @@ class DifAssembleEndpoint(APITestCase):
 
         # The order here is on purpose because we check for the order of checksums
         blob1 = FileBlob.from_file(fileobj1)
-        FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob1)
+        FileBlobOwner.objects.get_or_create(organization_id=self.organization.id, blob=blob1)
         blob3 = FileBlob.from_file(fileobj3)
-        FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob3)
+        FileBlobOwner.objects.get_or_create(organization_id=self.organization.id, blob=blob3)
         blob2 = FileBlob.from_file(fileobj2)
 
         # we make a request now but we are missing ownership for chunk 2
@@ -165,7 +165,7 @@ class DifAssembleEndpoint(APITestCase):
         assert response.data[total_checksum]["missingChunks"] == [checksum2]
 
         # we add ownership to chunk 2
-        FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob2)
+        FileBlobOwner.objects.get_or_create(organization_id=self.organization.id, blob=blob2)
 
         # new request, ownership for all chunks is there but file does not exist yet
         response = self.client.post(

+ 1 - 1
tests/sentry/api/endpoints/test_organization_release_assemble.py

@@ -55,7 +55,7 @@ class OrganizationReleaseAssembleTest(APITestCase):
         total_checksum = sha1(bundle_file).hexdigest()
 
         blob1 = FileBlob.from_file(ContentFile(bundle_file))
-        FileBlobOwner.objects.get_or_create(organization=self.organization, blob=blob1)
+        FileBlobOwner.objects.get_or_create(organization_id=self.organization.id, blob=blob1)
 
         response = self.client.post(
             self.url,

+ 2 - 2
tests/sentry/tasks/test_assemble.py

@@ -89,7 +89,7 @@ class AssembleDifTest(BaseAssembleTest):
             blob = FileBlob.objects.get(checksum=checksum)
             ref_bytes = reference.getvalue()
             assert blob.getfile().read(len(ref_bytes)) == ref_bytes
-            FileBlobOwner.objects.filter(blob=blob, organization=self.organization).get()
+            FileBlobOwner.objects.filter(blob=blob, organization_id=self.organization.id).get()
 
         rv = assemble_file(
             AssembleTask.DIF,
@@ -138,7 +138,7 @@ class AssembleDifTest(BaseAssembleTest):
             blob = FileBlob.objects.get(checksum=checksum)
             ref_bytes = reference.getvalue()
             assert blob.getfile().read(len(ref_bytes)) == ref_bytes
-            FileBlobOwner.objects.filter(blob=blob, organization=self.organization).get()
+            FileBlobOwner.objects.filter(blob=blob, organization_id=self.organization.id).get()
 
         rv = assemble_file(
             AssembleTask.DIF,