Browse Source

Truncate FlatFileIndex tables (#54440)

- Deletes all the associated `File`s.
- Truncates the index state and flat file index.
Arpad Borsos 1 year ago
parent
commit
bc7abaccbf

+ 1 - 1
migrations_lockfile.txt

@@ -7,5 +7,5 @@ will then be regenerated, and you should be able to merge without conflicts.
 
 nodestore: 0002_nodestore_no_dictfield
 replays: 0003_add_size_to_recording_segment
-sentry: 0527_backfill_next_checkin_latest
+sentry: 0528_truncate_flat_index
 social_auth: 0002_default_auto_field

+ 3 - 5
src/sentry/api/endpoints/artifact_lookup.py

@@ -73,11 +73,9 @@ class ProjectArtifactLookupEndpoint(ProjectEndpoint):
             )
             metrics.incr("sourcemaps.download.release_file")
         elif ty == "bundle_index":
-            file = (
-                ArtifactBundleFlatFileIndex.objects.filter(id=ty_id, project_id=project.id)
-                .select_related("flat_file_index")
-                .first()
-            )
+            file = ArtifactBundleFlatFileIndex.objects.filter(
+                id=ty_id, project_id=project.id
+            ).first()
             metrics.incr("sourcemaps.download.flat_file_index")
 
             if file is not None and (data := file.load_flat_file_index()):

+ 5 - 9
src/sentry/debug_files/artifact_bundle_indexing.py

@@ -250,15 +250,11 @@ def update_artifact_bundle_index(
 
     lock = identifier.get_lock()
     with TimedRetryPolicy(60)(lock.acquire):
-        flat_file_index = (
-            ArtifactBundleFlatFileIndex.objects.select_related("flat_file_index")
-            .filter(
-                project_id=identifier.project_id,
-                release_name=identifier.release,
-                dist_name=identifier.dist,
-            )
-            .first()
-        )
+        flat_file_index = ArtifactBundleFlatFileIndex.objects.filter(
+            project_id=identifier.project_id,
+            release_name=identifier.release,
+            dist_name=identifier.dist,
+        ).first()
 
         index = FlatFileIndex()
         # Load the index from the file if it exists

+ 56 - 0
src/sentry/migrations/0528_truncate_flat_index.py

@@ -0,0 +1,56 @@
+# Generated by Django 3.2.20 on 2023-08-09 08:09
+
+from django.db import migrations
+
+from sentry.new_migrations.migrations import CheckedMigration
+from sentry.utils.query import RangeQuerySetWrapperWithProgressBar
+
+
+def delete_flat_files(apps, schema_editor):
+    FlatFileIndexState = apps.get_model("sentry", "FlatFileIndexState")
+    ArtifactBundleFlatFileIndex = apps.get_model("sentry", "ArtifactBundleFlatFileIndex")
+
+    # step 1: truncate all of `FlatFileIndexState`:
+    FlatFileIndexState.objects.raw("TRUNCATE sentry_flatfileindexstate")
+
+    # step 2: delete all the associated `File`s,
+    # and delete all the `ArtifactBundleFlatFileIndex` records:
+    for obj in RangeQuerySetWrapperWithProgressBar(
+        ArtifactBundleFlatFileIndex.objects.select_related("flat_file_index").all()
+    ):
+        # NOTE: this data migration only works as long as `flat_file_index` still
+        # exists on the Model.
+        if obj.flat_file_index:
+            obj.flat_file_index.delete()
+        obj.delete()
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = True
+
+    dependencies = [
+        ("sentry", "0527_backfill_next_checkin_latest"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            delete_flat_files,
+            migrations.RunPython.noop,
+            hints={
+                "tables": [
+                    "sentry_flatfileindexstate",
+                    "sentry_artifactbundleflatfileindex",
+                ]
+            },
+        ),
+    ]

+ 5 - 8
src/sentry/models/artifactbundle.py

@@ -132,10 +132,12 @@ class ArtifactBundleFlatFileIndex(Model):
     project_id = BoundedBigIntegerField(db_index=True)
     release_name = models.CharField(max_length=250)
     dist_name = models.CharField(max_length=64, default=NULL_STRING)
-    # This association is nullable since we need it for a correct durable implementation of the `FlatFileIndexState`.
-    flat_file_index = FlexibleForeignKey("sentry.File", null=True)
     date_added = models.DateTimeField(default=timezone.now)
 
+    # TODO: This column is in the process of being removed.
+    # For now, it still exists only to facilitate deleting all existing files.
+    flat_file_index = FlexibleForeignKey("sentry.File", null=True)
+
     class Meta:
         app_label = "sentry"
         db_table = "sentry_artifactbundleflatfileindex"
@@ -147,12 +149,7 @@ class ArtifactBundleFlatFileIndex(Model):
 
     def update_flat_file_index(self, data: str):
         indexstore.set_bytes(self._indexstore_id(), data.encode())
-
-        current_file = self.flat_file_index
-        if current_file:
-            current_file.delete()
-
-        self.update(flat_file_index=None, date_added=timezone.now())
+        self.update(date_added=timezone.now())
 
     def load_flat_file_index(self) -> Optional[bytes]:
         return indexstore.get_bytes(self._indexstore_id())

+ 0 - 1
tests/sentry/lang/native/test_sources.py

@@ -18,7 +18,6 @@ def _mock_flat_file_index(
         project_id=project_id,
         release_name=release or "",
         dist_name=dist or "",
-        flat_file_index=None,
     )
     index.update_flat_file_index("{}")
 

+ 39 - 0
tests/sentry/migrations/test_0528_truncate_flat_index.py

@@ -0,0 +1,39 @@
+import uuid
+
+from sentry.models.artifactbundle import (
+    ArtifactBundle,
+    ArtifactBundleFlatFileIndex,
+    ArtifactBundleIndexingState,
+    FlatFileIndexState,
+)
+from sentry.models.file import File
+from sentry.testutils.cases import TestMigrations
+
+
+class RemoveFlatFileIndexFiles(TestMigrations):
+    migrate_from = "0527_backfill_next_checkin_latest"
+    migrate_to = "0528_truncate_flat_index"
+
+    def setup_before_migration(self, apps):
+        artifact_bundle = ArtifactBundle.objects.create(
+            organization_id=self.organization.id,
+            bundle_id=uuid.uuid4().hex,
+            file=File.objects.create(name="bundle.zip", type="application/octet-stream"),
+            artifact_count=10,
+        )
+        flat_file_index = ArtifactBundleFlatFileIndex.objects.create(
+            project_id=self.project.id,
+            release_name="foo",
+            flat_file_index=File.objects.create(name="flat_file_index"),
+        )
+        FlatFileIndexState.objects.create(
+            flat_file_index=flat_file_index,
+            artifact_bundle=artifact_bundle,
+            indexing_state=ArtifactBundleIndexingState.NOT_INDEXED.value,
+        )
+
+    def test_deletes_linked_files(self):
+        assert ArtifactBundleFlatFileIndex.objects.count() == 0
+        assert FlatFileIndexState.objects.count() == 0
+
+        assert File.objects.count() == 1