Browse Source

ref(hybrid-cloud): JIT migration of control silo avatars (#49866)

This adds control_file_id references to control silo avatars (user
avatars, sentry app avatars, doc integration avatars). It also will
trigger copying those files to the new control silo on access as soon 
as the new filestore configurations are merged.

Once this is in, we can merge
https://github.com/getsentry/getsentry/pull/10734
https://github.com/getsentry/ops/pull/6990
https://github.com/getsentry/getsentry/pull/10681

---------

Co-authored-by: Anton Ovchinnikov <anton@tonyo.info>
Mike Ihbe 1 year ago
parent
commit
7b06ff0f6a

+ 1 - 1
migrations_lockfile.txt

@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 will then be regenerated, and you should be able to merge without conflicts.
 
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0476_convert_unresolved_to_set_escalating_activitytype
+sentry: 0477_control_avatars
 social_auth: 0001_initial

+ 3 - 0
src/sentry/conf/server.py

@@ -762,6 +762,7 @@ CELERY_QUEUES = [
         "events.symbolicate_js_event_low_priority",
         routing_key="events.symbolicate_js_event_low_priority",
     ),
+    Queue("files.copy", routing_key="files.copy"),
     Queue("files.delete", routing_key="files.delete"),
     Queue(
         "group_owners.process_suspect_commits", routing_key="group_owners.process_suspect_commits"
@@ -3430,3 +3431,5 @@ MAX_ENVIRONMENTS_PER_MONITOR = 1000
 # Raise schema validation errors and make the indexer crash (only useful in
 # tests)
 SENTRY_METRICS_INDEXER_RAISE_VALIDATION_ERRORS = False
+
+SENTRY_FILE_COPY_ROLLOUT_RATE = 0.01

+ 42 - 0
src/sentry/migrations/0477_control_avatars.py

@@ -0,0 +1,42 @@
+# Generated by Django 2.2.28 on 2023-05-26 17:00
+
+from django.db import migrations
+
+import sentry.db.models.fields.bounded
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = False
+
+    dependencies = [
+        ("sentry", "0476_convert_unresolved_to_set_escalating_activitytype"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="docintegrationavatar",
+            name="control_file_id",
+            field=sentry.db.models.fields.bounded.BoundedBigIntegerField(null=True, unique=True),
+        ),
+        migrations.AddField(
+            model_name="sentryappavatar",
+            name="control_file_id",
+            field=sentry.db.models.fields.bounded.BoundedBigIntegerField(null=True, unique=True),
+        ),
+        migrations.AddField(
+            model_name="useravatar",
+            name="control_file_id",
+            field=sentry.db.models.fields.bounded.BoundedBigIntegerField(null=True, unique=True),
+        ),
+    ]

+ 2 - 0
src/sentry/models/avatars/__init__.py

@@ -1,4 +1,5 @@
 from .base import AvatarBase
+from .control_base import ControlAvatarBase
 from .doc_integration_avatar import DocIntegrationAvatar
 from .organization_avatar import OrganizationAvatar
 from .project_avatar import ProjectAvatar
@@ -8,6 +9,7 @@ from .user_avatar import UserAvatar
 
 __all__ = (
     "AvatarBase",
+    "ControlAvatarBase",
     "DocIntegrationAvatar",
     "OrganizationAvatar",
     "ProjectAvatar",

+ 37 - 12
src/sentry/models/avatars/base.py

@@ -1,11 +1,14 @@
 from io import BytesIO
 from uuid import uuid4
 
+from django.core.exceptions import ObjectDoesNotExist
 from django.db import models, router
 from django.utils.encoding import force_bytes
 from PIL import Image
 
 from sentry.db.models import BoundedBigIntegerField, Model
+from sentry.models.files.file import File
+from sentry.tasks.files import copy_file_to_control_and_update_model
 from sentry.utils.cache import cache
 from sentry.utils.db import atomic_transaction
 
@@ -35,17 +38,31 @@ class AvatarBase(Model):
         return super().save(*args, **kwargs)
 
     def get_file(self):
-        from sentry.models import File
-
-        if self.file_id is None:
-            return None
+        # Favor control_file_id if it exists and is set.
+        # Otherwise fallback to file_id. If still None, return.
+        file_class = self.file_class()
+        file_id = getattr(self, self.file_fk())
+        if file_id is None:
+            file_id = self.file_id
+            file_class = File
+            if file_id is None:
+                return None
+            copy_file_to_control_and_update_model.apply_async(
+                kwargs={
+                    "app_name": "sentry",
+                    "model_name": type(self).__name__,
+                    "model_id": self.id,
+                    "file_id": file_id,
+                }
+            )
 
         try:
-            return File.objects.get(pk=self.file_id)
-        except File.DoesNotExist:
+            return file_class.objects.get(pk=file_id)
+        except ObjectDoesNotExist:
             # Best effort replication of previous behaviour with foreign key
             # which was set with on_delete=models.SET_NULL
-            self.update(file_id=None)
+            update = {self.file_fk(): None}
+            self.update(**update)
             return None
 
     def delete(self, *args, **kwargs):
@@ -79,12 +96,20 @@ class AvatarBase(Model):
         return photo
 
     @classmethod
-    def save_avatar(cls, relation, type, avatar=None, filename=None, color=None):
+    def file_class(cls):
         from sentry.models import File
 
+        return File
+
+    @classmethod
+    def file_fk(cls) -> str:
+        return "file_id"
+
+    @classmethod
+    def save_avatar(cls, relation, type, avatar=None, filename=None, color=None):
         if avatar:
-            with atomic_transaction(using=router.db_for_write(File)):
-                photo = File.objects.create(name=filename, type=cls.FILE_TYPE)
+            with atomic_transaction(using=router.db_for_write(cls.file_class())):
+                photo = cls.file_class().objects.create(name=filename, type=cls.FILE_TYPE)
                 # XXX: Avatar may come in as a string instance in python2
                 # if it's not wrapped in BytesIO.
                 if isinstance(avatar, str):
@@ -99,7 +124,7 @@ class AvatarBase(Model):
         with atomic_transaction(
             using=(
                 router.db_for_write(cls),
-                router.db_for_write(File),
+                router.db_for_write(cls.file_class()),
             )
         ):
             if relation.get("sentry_app") and color is not None:
@@ -111,7 +136,7 @@ class AvatarBase(Model):
                 file.delete()
 
             if photo:
-                instance.file_id = photo.id
+                setattr(instance, cls.file_fk(), photo.id)
                 instance.ident = uuid4().hex
 
             instance.avatar_type = [i for i, n in cls.AVATAR_TYPES if n == type][0]

+ 24 - 0
src/sentry/models/avatars/control_base.py

@@ -0,0 +1,24 @@
+from sentry.db.models import BoundedBigIntegerField
+from sentry.models.avatars.base import AvatarBase
+from sentry.models.files import ControlFileBlob, File
+
+
+class ControlAvatarBase(AvatarBase):
+    control_file_id = BoundedBigIntegerField(unique=True, null=True)
+
+    class Meta:
+        abstract = True
+
+    @classmethod
+    def file_class(cls):
+        from sentry.models import ControlFile
+
+        if ControlFileBlob._storage_config():
+            return ControlFile
+        return File
+
+    @classmethod
+    def file_fk(cls) -> str:
+        if ControlFileBlob._storage_config():
+            return "control_file_id"
+        return "file_id"

+ 2 - 2
src/sentry/models/avatars/doc_integration_avatar.py

@@ -3,11 +3,11 @@ from django.utils import timezone
 
 from sentry.db.models import FlexibleForeignKey, control_silo_only_model
 
-from . import AvatarBase
+from . import ControlAvatarBase
 
 
 @control_silo_only_model
-class DocIntegrationAvatar(AvatarBase):
+class DocIntegrationAvatar(ControlAvatarBase):
     """
     A DocIntegrationAvatar associates a DocIntegration with a logo photo File.
     """

+ 2 - 2
src/sentry/models/avatars/sentry_app_avatar.py

@@ -9,7 +9,7 @@ from django.db import models
 from sentry.db.models import FlexibleForeignKey, control_silo_only_model
 from sentry.db.models.manager import BaseManager
 
-from . import AvatarBase
+from . import ControlAvatarBase
 
 if TYPE_CHECKING:
     from sentry.models import SentryApp
@@ -37,7 +37,7 @@ class SentryAppAvatarManager(BaseManager):
 
 
 @control_silo_only_model
-class SentryAppAvatar(AvatarBase):
+class SentryAppAvatar(ControlAvatarBase):
     """
     A SentryAppAvatar associates a SentryApp with a logo photo File
     and specifies which type of logo it is.

+ 2 - 2
src/sentry/models/avatars/user_avatar.py

@@ -2,11 +2,11 @@ from django.db import models
 
 from sentry.db.models import BaseManager, FlexibleForeignKey, control_silo_only_model
 
-from . import AvatarBase
+from . import ControlAvatarBase
 
 
 @control_silo_only_model
-class UserAvatar(AvatarBase):
+class UserAvatar(ControlAvatarBase):
     """
     A UserAvatar associates a User with their avatar photo File
     and contains their preferences for avatar type.

+ 3 - 3
src/sentry/models/files/abstractfileblob.py

@@ -66,7 +66,7 @@ class AbstractFileBlob(Model):
             )
             blob = cls(size=size, checksum=checksum)
             blob.path = cls.generate_unique_path()
-            storage = get_storage()
+            storage = get_storage(cls._storage_config())
             storage.save(blob.path, fileobj)
             blobs_to_save.append((blob, lock))
             metrics.timing("filestore.blob-size", size, tags={"function": "from_files"})
@@ -174,7 +174,7 @@ class AbstractFileBlob(Model):
 
             blob = cls(size=size, checksum=checksum)
             blob.path = cls.generate_unique_path()
-            storage = get_storage()
+            storage = get_storage(cls._storage_config())
             storage.save(blob.path, fileobj)
             blob.save()
 
@@ -229,5 +229,5 @@ class AbstractFileBlob(Model):
         """
         assert self.path
 
-        storage = get_storage()
+        storage = get_storage(self._storage_config())
         return storage.open(self.path)

Some files were not shown because too many files changed in this diff