Просмотр исходного кода

Revert "feat(releases): Persist artifact count (#26448)" (#26711)

Made counting SQL queries too slow

This reverts commit ea14d9fa26c796af6cb6a19fe4a22b22b45c4e7c.
Joris Bayer 3 лет назад
Родитель
Сommit
20355f5c59

+ 1 - 1
migrations_lockfile.txt

@@ -7,5 +7,5 @@ will then be regenerated, and you should be able to merge without conflicts.
 
 jira_ac: 0001_initial
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0211_add_artifact_count
+sentry: 0210_backfill_project_transaction_thresholds
 social_auth: 0001_initial

+ 7 - 3
src/sentry/api/endpoints/debug_files.py

@@ -4,7 +4,7 @@ import re
 
 import jsonschema
 from django.db import transaction
-from django.db.models import Q
+from django.db.models import Count, Q
 from django.http import Http404, HttpResponse, StreamingHttpResponse
 from rest_framework.response import Response
 from symbolic import SymbolicError, normalize_debug_id
@@ -428,8 +428,12 @@ class SourceMapsEndpoint(ProjectEndpoint):
             }
 
         def serialize_results(results):
-            file_counts = Release.with_artifact_counts(id__in=[r["id"] for r in results])
-            file_count_map = {r.id: r.count for r in file_counts}
+            file_counts = (
+                ReleaseFile.public_objects.filter(release_id__in=[r["id"] for r in results])
+                .values("release_id")
+                .annotate(count=Count("id"))
+            )
+            file_count_map = {r["release_id"]: r["count"] for r in file_counts}
             return serialize(
                 [expose_release(r, file_count_map.get(r["id"], 0)) for r in results], request.user
             )

+ 11 - 2
src/sentry/api/endpoints/organization_release_meta.py

@@ -5,7 +5,14 @@ from rest_framework.response import Response
 from sentry.api.bases.organization import OrganizationReleasesBaseEndpoint
 from sentry.api.exceptions import ResourceDoesNotExist
 from sentry.api.serializers.models.release import expose_version_info
-from sentry.models import CommitFileChange, ProjectPlatform, Release, ReleaseCommit, ReleaseProject
+from sentry.models import (
+    CommitFileChange,
+    ProjectPlatform,
+    Release,
+    ReleaseCommit,
+    ReleaseFile,
+    ReleaseProject,
+)
 
 
 class OrganizationReleaseMetaEndpoint(OrganizationReleasesBaseEndpoint):
@@ -70,6 +77,8 @@ class OrganizationReleaseMetaEndpoint(OrganizationReleasesBaseEndpoint):
             for pr in project_releases
         ]
 
+        release_file_count = ReleaseFile.public_objects.filter(release=release).count()
+
         return Response(
             {
                 "version": release.version,
@@ -80,6 +89,6 @@ class OrganizationReleaseMetaEndpoint(OrganizationReleasesBaseEndpoint):
                 "commitCount": release.commit_count,
                 "released": release.date_released or release.date_added,
                 "commitFilesChanged": commit_files_changed,
-                "releaseFileCount": release.count_artifacts(),
+                "releaseFileCount": release_file_count,
             }
         )

+ 0 - 52
src/sentry/migrations/0211_add_artifact_count.py

@@ -1,52 +0,0 @@
-# Generated by Django 1.11.29 on 2021-06-08 08:24
-
-from django.db import migrations
-
-import sentry.db.models.fields.bounded
-
-
-class Migration(migrations.Migration):
-    # This flag is used to mark that a migration shouldn't be automatically run in
-    # production. We set this to True for operations that we think are risky and want
-    # someone from ops to run manually and monitor.
-    # General advice is that if in doubt, mark your migration as `is_dangerous`.
-    # Some things you should always mark as dangerous:
-    # - Large data migrations. Typically we want these to be run manually by ops so that
-    #   they can be monitored. Since data migrations will now hold a transaction open
-    #   this is even more important.
-    # - Adding columns to highly active tables, even ones that are NULL.
-    is_dangerous = True
-
-    # This flag is used to decide whether to run this migration in a transaction or not.
-    # By default we prefer to run in a transaction, but for migrations where you want
-    # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll
-    # want to create an index concurrently when adding one to an existing table.
-    # You'll also usually want to set this to `False` if you're writing a data
-    # migration, since we don't want the entire migration to run in one long-running
-    # transaction.
-    atomic = True
-
-    dependencies = [
-        ("sentry", "0210_backfill_project_transaction_thresholds"),
-    ]
-
-    operations = [
-        migrations.SeparateDatabaseAndState(
-            database_operations=[
-                migrations.AddField(
-                    model_name="releasefile",
-                    name="artifact_count",
-                    field=sentry.db.models.fields.bounded.BoundedPositiveIntegerField(null=True),
-                ),
-            ],
-            state_operations=[
-                migrations.AddField(
-                    model_name="releasefile",
-                    name="artifact_count",
-                    field=sentry.db.models.fields.bounded.BoundedPositiveIntegerField(
-                        null=True, default=1
-                    ),
-                ),
-            ],
-        )
-    ]

+ 1 - 22
src/sentry/models/release.py

@@ -5,7 +5,7 @@ from time import time
 
 import sentry_sdk
 from django.db import IntegrityError, models, transaction
-from django.db.models import Case, F, Func, Q, Sum, Value, When
+from django.db.models import Case, F, Func, Q, Value, When
 from django.utils import timezone
 from django.utils.functional import cached_property
 from django.utils.translation import ugettext_lazy as _
@@ -928,24 +928,3 @@ class Release(Model):
             releasefile.file.delete()
             releasefile.delete()
         self.delete()
-
-    @classmethod
-    def with_artifact_counts(cls, *args, **kwargs):
-        # Have to exclude Releases without release files because COALESCE would
-        # give them an artifact count of 1
-        return (
-            cls.objects.filter(*args, **kwargs)
-            .exclude(releasefile__isnull=True)
-            .annotate(count=Sum(Func(F("releasefile__artifact_count"), 1, function="COALESCE")))
-        )
-
-    def count_artifacts(self):
-        """Sum the artifact_counts of all release files.
-
-        An artifact count of NULL is interpreted as 1.
-        """
-        qs = Release.with_artifact_counts(pk=self.pk)
-        try:
-            return qs[0].count
-        except IndexError:
-            return 0

+ 2 - 17
src/sentry/models/releasefile.py

@@ -63,12 +63,6 @@ class ReleaseFile(Model):
     name = models.TextField()
     dist = FlexibleForeignKey("sentry.Distribution", null=True)
 
-    #: For classic file uploads, this field is 1.
-    #: For release archives, this field is 0.
-    #: For artifact indexes, this field is the number of artifacts contained
-    #: in the index.
-    artifact_count = BoundedPositiveIntegerField(null=True, default=1)
-
     __repr__ = sane_repr("release", "ident")
 
     objects = models.Manager()  # The default manager.
@@ -223,10 +217,6 @@ class _ArtifactIndexData:
         """Meant to be read-only"""
         return self._data
 
-    @property
-    def num_files(self):
-        return len(self._data.get("files", {}))
-
     def get(self, filename: str):
         return self._data.get("files", {}).get(filename, None)
 
@@ -299,14 +289,10 @@ class _ArtifactIndexGuard:
 
                 target_file.putfile(BytesIO(json.dumps(index_data.data).encode()))
 
-                artifact_count = index_data.num_files
-                if created:
-                    # NOTE: could pass this to into get_or_create instead
-                    releasefile.update(artifact_count=artifact_count)
-                else:
+                if not created:
                     # Update and clean existing
                     old_file = releasefile.file
-                    releasefile.update(file=target_file, artifact_count=artifact_count)
+                    releasefile.update(file=target_file)
                     old_file.delete()
 
     def _get_or_create_releasefile(self):
@@ -359,7 +345,6 @@ def update_artifact_index(release: Release, dist: Optional[Distribution], archiv
         organization_id=release.organization_id,
         dist=dist,
         file=archive_file,
-        artifact_count=0,  # Artifacts will be counted with artifact index
     )
 
     files_out = {}

+ 11 - 28
src/sentry/tasks/assemble.py

@@ -159,49 +159,36 @@ class AssembleArtifactsError(Exception):
     pass
 
 
-def _simple_update(
-    release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive, additional_fields: dict
-) -> bool:
+def _simple_update(release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive):
     """Update function used in _upsert_release_file"""
     old_file = release_file.file
-    release_file.update(file=new_file, **additional_fields)
+    release_file.update(file=new_file)
     old_file.delete()
 
-    return True
 
-
-def _upsert_release_file(
-    file: File, archive: ReleaseArchive, update_fn, key_fields, additional_fields
-) -> bool:
-    success = False
+def _upsert_release_file(file: File, archive: ReleaseArchive, update_fn, **kwargs):
     release_file = None
 
     # Release files must have unique names within their release
     # and dist. If a matching file already exists, replace its
     # file with the new one; otherwise create it.
     try:
-        release_file = ReleaseFile.objects.get(**key_fields)
+        release_file = ReleaseFile.objects.get(**kwargs)
     except ReleaseFile.DoesNotExist:
         try:
             with transaction.atomic():
-                release_file = ReleaseFile.objects.create(
-                    file=file, **dict(key_fields, **additional_fields)
-                )
+                release_file = ReleaseFile.objects.create(file=file, **kwargs)
         except IntegrityError:
             # NB: This indicates a race, where another assemble task or
             # file upload job has just created a conflicting file. Since
             # we're upserting here anyway, yield to the faster actor and
             # do not try again.
             file.delete()
-        else:
-            success = True
     else:
-        success = update_fn(release_file, file, archive, additional_fields)
+        update_fn(release_file, file, archive)
 
-    return success
 
-
-def _store_single_files(archive: ReleaseArchive, meta: dict, count_as_artifacts: bool):
+def _store_single_files(archive: ReleaseArchive, meta: dict):
     try:
         temp_dir = archive.extract()
     except BaseException:
@@ -222,8 +209,7 @@ def _store_single_files(archive: ReleaseArchive, meta: dict, count_as_artifacts:
                 file.putfile(fp, logger=logger)
 
             kwargs = dict(meta, name=artifact_url)
-            extra_fields = {"artifact_count": 1 if count_as_artifacts else 0}
-            _upsert_release_file(file, None, _simple_update, kwargs, extra_fields)
+            _upsert_release_file(file, None, _simple_update, **kwargs)
 
 
 @instrumented_task(name="sentry.tasks.assemble.assemble_artifacts", queue="assemble")
@@ -283,21 +269,19 @@ def assemble_artifacts(org_id, version, checksum, chunks, **kwargs):
             if dist_name:
                 dist = release.add_dist(dist_name)
 
-            num_files = len(manifest.get("files", {}))
-
             meta = {  # Required for release file creation
                 "organization_id": organization.id,
                 "release": release,
                 "dist": dist,
             }
 
-            saved_as_archive = False
+            num_files = len(manifest.get("files", {}))
+
             if options.get("processing.save-release-archives"):
                 min_size = options.get("processing.release-archive-min-files")
                 if num_files >= min_size:
                     try:
                         update_artifact_index(release, dist, bundle)
-                        saved_as_archive = True
                     except BaseException as exc:
                         logger.error("Unable to update artifact index", exc_info=exc)
 
@@ -305,8 +289,7 @@ def assemble_artifacts(org_id, version, checksum, chunks, **kwargs):
             # rolling back from release archives. Once release archives run
             # smoothely, this call can be removed / only called when feature
             # flag is off.
-            count_as_artifacts = not saved_as_archive
-            _store_single_files(archive, meta, count_as_artifacts)
+            _store_single_files(archive, meta)
 
             # Count files extracted, to compare them to release files endpoint
             metrics.incr("tasks.assemble.extracted_files", amount=num_files)

+ 0 - 5
tests/sentry/api/endpoints/test_organization_release_file_details.py

@@ -123,8 +123,6 @@ class ReleaseFileDeleteTest(APITestCase):
         )
         release.add_project(project)
 
-        assert release.count_artifacts() == 0
-
         releasefile = ReleaseFile.objects.create(
             organization_id=project.organization_id,
             release=release,
@@ -132,8 +130,6 @@ class ReleaseFileDeleteTest(APITestCase):
             name="http://example.com/application.js",
         )
 
-        assert release.count_artifacts() == 1
-
         url = reverse(
             "sentry-api-0-organization-release-file-details",
             kwargs={
@@ -149,4 +145,3 @@ class ReleaseFileDeleteTest(APITestCase):
 
         assert not ReleaseFile.objects.filter(id=releasefile.id).exists()
         assert not File.objects.filter(id=releasefile.file.id).exists()
-        assert release.count_artifacts() == 0

+ 0 - 4
tests/sentry/api/endpoints/test_organization_release_files.py

@@ -40,8 +40,6 @@ class ReleaseFileCreateTest(APITestCase):
         release = Release.objects.create(organization_id=project.organization_id, version="1")
         release.add_project(project)
 
-        assert release.count_artifacts() == 0
-
         url = reverse(
             "sentry-api-0-organization-release-files",
             kwargs={"organization_slug": project.organization.slug, "version": release.version},
@@ -61,8 +59,6 @@ class ReleaseFileCreateTest(APITestCase):
             format="multipart",
         )
 
-        assert release.count_artifacts() == 1
-
         assert response.status_code == 201, response.content
 
         releasefile = ReleaseFile.objects.get(release=release)

+ 0 - 5
tests/sentry/api/endpoints/test_project_release_file_details.py

@@ -146,8 +146,6 @@ class ReleaseFileDeleteTest(APITestCase):
         )
         release.add_project(project)
 
-        assert release.count_artifacts() == 0
-
         releasefile = ReleaseFile.objects.create(
             organization_id=project.organization_id,
             release=release,
@@ -155,8 +153,6 @@ class ReleaseFileDeleteTest(APITestCase):
             name="http://example.com/application.js",
         )
 
-        assert release.count_artifacts() == 1
-
         url = reverse(
             "sentry-api-0-project-release-file-details",
             kwargs={
@@ -173,4 +169,3 @@ class ReleaseFileDeleteTest(APITestCase):
 
         assert not ReleaseFile.objects.filter(id=releasefile.id).exists()
         assert not File.objects.filter(id=releasefile.file.id).exists()
-        assert release.count_artifacts() == 0

Некоторые файлы не были показаны из-за большого количества измененных файлов