Browse Source

ref(hybrid-cloud): Denormalize user avatars 1/3 (#45685)

Denormalizing user avatars for hybrid cloud. This allows us to only run
the filestore service on region silos and avoids cross region queries.

Broken out from https://github.com/getsentry/sentry/pull/45115.

Process will be:
* Add migration and model changes. Dual write to new columns. <-- We are
here
* Backfill
* Change serializer to new model

---------

Co-authored-by: Mike Ihbe <mike.ihbe@sentry.io>
Mike Ihbe 2 years ago
parent
commit
ef1cce9ec8

+ 1 - 1
migrations_lockfile.txt

@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 will then be regenerated, and you should be able to merge without conflicts.
 
 nodestore: 0002_nodestore_no_dictfield
-sentry: 0382_add_installation_id_to_service_hook
+sentry: 0383_mv_user_avatar
 social_auth: 0001_initial

+ 19 - 6
src/sentry/api/bases/avatar.py

@@ -1,9 +1,13 @@
+from typing import Any, Tuple
+
 from rest_framework import serializers, status
 from rest_framework.request import Request
 from rest_framework.response import Response
 
 from sentry.api.fields import AvatarField
 from sentry.api.serializers import serialize
+from sentry.api.serializers.base import Serializer
+from sentry.models.avatars.base import AvatarBase
 
 
 class AvatarSerializer(serializers.Serializer):
@@ -16,8 +20,12 @@ class AvatarSerializer(serializers.Serializer):
         attrs = super().validate(attrs)
         if attrs.get("avatar_type") == "upload":
             model_type = self.context["type"]
+            kwargs_copy = self.context["kwargs"].copy()
+            if "user" in kwargs_copy:
+                user = kwargs_copy.pop("user")
+                kwargs_copy["user_id"] = user.id
             has_existing_file = model_type.objects.filter(
-                file_id__isnull=False, **self.context["kwargs"]
+                file_id__isnull=False, **kwargs_copy
             ).exists()
             if not has_existing_file and not attrs.get("avatar_photo"):
                 raise serializers.ValidationError(
@@ -41,19 +49,18 @@ class AvatarMixin:
     def get_avatar_filename(self, obj):
         return f"{obj.id}.png"
 
-    def put(self, request: Request, **kwargs) -> Response:
+    def parse(self, request: Request, **kwargs) -> Tuple[Any, Serializer]:
         obj = kwargs.pop(self.object_type, None)
 
         serializer = self.serializer_cls(
             data=request.data, context=self.get_serializer_context(obj)
         )
+        return (obj, serializer)
 
-        if not serializer.is_valid():
-            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
-
+    def save_avatar(self, obj: Any, serializer: Serializer, **kwargs) -> AvatarBase:
         result = serializer.validated_data
 
-        self.model.save_avatar(
+        return self.model.save_avatar(
             relation={self.object_type: obj},
             type=result["avatar_type"],
             avatar=result.get("avatar_photo"),
@@ -61,4 +68,10 @@ class AvatarMixin:
             color=result.get("color"),
         )
 
+    def put(self, request: Request, **kwargs) -> Response:
+        obj, serializer = self.parse(request, **kwargs)
+        if not serializer.is_valid():
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+        self.save_avatar(obj, serializer)
+        obj = kwargs.pop(self.object_type, None)  # serialize doesn't like these params
         return Response(serialize(obj, request.user, **kwargs))

+ 32 - 4
src/sentry/api/endpoints/avatar/user.py

@@ -2,20 +2,48 @@ from rest_framework import status
 from rest_framework.request import Request
 from rest_framework.response import Response
 
-from sentry.api.base import control_silo_endpoint
+from sentry.api.base import region_silo_endpoint
 from sentry.api.bases.avatar import AvatarMixin
 from sentry.api.bases.user import UserEndpoint
 from sentry.models import UserAvatar
+from sentry.services.hybrid_cloud.user import user_service
+from sentry.types.region import get_local_region
 
 
-@control_silo_endpoint
+# This implementation deviates a bit from the typical AvatarMixin because of hybrid cloud
+# We have the usual problems with user->user_id translation, but also need to handle synchronizing
+# attributes across silos to avoid fetching user avatars from user serializers.
+# Despite updating a user attribute, user avatar FILEs live on regions. This allows us to avoid
+# running file stores on the control silo. So we simply update the user with the new attributes.
+@region_silo_endpoint
 class UserAvatarEndpoint(AvatarMixin, UserEndpoint):
     object_type = "user"
     model = UserAvatar
 
+    def get(self, request: Request, **kwargs) -> Response:
+        user = kwargs.pop(self.object_type, None)
+        serialized_user = user_service.serialize_many(
+            filter=dict(user_ids=[user.id]),
+            as_user=user,
+        )[0]
+        return Response(serialized_user)
+
     def put(self, request: Request, **kwargs) -> Response:
         user = kwargs["user"]
-        if user != request.user:
+        if user.id != request.user.id:
             return Response(status=status.HTTP_403_FORBIDDEN)
 
-        return super().put(request, **kwargs)
+        obj, serializer = super().parse(request, **kwargs)
+        if not serializer.is_valid():
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+        user_avatar = super().save_avatar(obj, serializer)
+
+        # Update the corresponding user attributes
+        avatar_url = None
+        if user_avatar.get_avatar_type_display() == "upload":
+            avatar_url = get_local_region().to_url(f"/avatar/{user_avatar.ident}/")
+        serialized_user = user_service.update_user(
+            user_id=request.user.id,
+            attrs=dict(avatar_url=avatar_url, avatar_type=user_avatar.avatar_type),
+        )
+        return Response(serialized_user)

+ 60 - 0
src/sentry/migrations/0383_mv_user_avatar.py

@@ -0,0 +1,60 @@
+# Generated by Django 2.2.28 on 2023-02-25 07:21
+
+from django.db import migrations, models
+
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
+    # the most part, this should only be used for operations where it's safe to run the migration
+    # after your code has deployed. So this should not be used for most operations that alter the
+    # schema of a table.
+    # Here are some things that make sense to mark as dangerous:
+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
+    #   be monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
+    #   change, it's completely safe to run the operation after the code has deployed.
+    is_dangerous = False
+
+    dependencies = [
+        ("sentry", "0382_add_installation_id_to_service_hook"),
+    ]
+
+    operations = [
+        migrations.SeparateDatabaseAndState(
+            database_operations=[
+                migrations.RunSQL(
+                    """
+                    ALTER TABLE "auth_user" ADD COLUMN "avatar_type" smallint NOT NULL DEFAULT 0;
+                    """,
+                    reverse_sql="""
+                    ALTER TABLE "auth_user" DROP COLUMN "avatar_type";
+                    """,
+                    hints={"tables": ["auth_user"]},
+                ),
+                migrations.RunSQL(
+                    """
+                    ALTER TABLE "auth_user" ADD COLUMN "avatar_url" varchar(120) DEFAULT NULL;
+                    """,
+                    reverse_sql="""
+                    ALTER TABLE "auth_user" DROP COLUMN "avatar_url";
+                    """,
+                    hints={"tables": ["auth_user"]},
+                ),
+            ],
+            state_operations=[
+                migrations.AddField(
+                    model_name="user",
+                    name="avatar_type",
+                    field=models.PositiveSmallIntegerField(default=0),
+                ),
+                migrations.AddField(
+                    model_name="user",
+                    name="avatar_url",
+                    field=models.CharField(max_length=120, null=True),
+                ),
+            ],
+        )
+    ]

+ 5 - 5
src/sentry/models/user.py

@@ -24,7 +24,7 @@ from sentry.db.models import (
     sane_repr,
 )
 from sentry.db.postgres.roles import in_test_psql_role_override
-from sentry.models import LostPasswordHash
+from sentry.models import LostPasswordHash, UserAvatar
 from sentry.models.authenticator import Authenticator
 from sentry.models.outbox import ControlOutbox, OutboxCategory, OutboxScope, find_regions_for_user
 from sentry.services.hybrid_cloud.user import RpcUser
@@ -181,6 +181,9 @@ class User(BaseModel, AbstractBaseUser):
     date_joined = models.DateTimeField(_("date joined"), default=timezone.now)
     last_active = models.DateTimeField(_("last active"), default=timezone.now, null=True)
 
+    avatar_type = models.PositiveSmallIntegerField(default=0, choices=UserAvatar.AVATAR_TYPES)
+    avatar_url = models.CharField(_("avatar url"), max_length=120, null=True)
+
     objects = UserManager(cache_fields=["pk"])
 
     USERNAME_FIELD = "username"
@@ -262,10 +265,7 @@ class User(BaseModel, AbstractBaseUser):
         return first_name.capitalize()
 
     def get_avatar_type(self):
-        avatar = self.avatar.first()
-        if avatar:
-            return avatar.get_avatar_type_display()
-        return "letter_avatar"
+        return self.get_avatar_type_display()
 
     def send_confirm_email_singular(self, email, is_new_user=False):
         from sentry import options

+ 15 - 0
src/sentry/services/hybrid_cloud/user/__init__.py

@@ -113,6 +113,11 @@ class UserFilterArgs(TypedDict, total=False):
     emails: List[str]
 
 
+class UserUpdateArgs(TypedDict, total=False):
+    avatar_url: str
+    avatar_type: int
+
+
 class UserService(
     FilterQueryInterface[UserFilterArgs, RpcUser, UserSerializeType], InterfaceWithLifecycle
 ):
@@ -158,6 +163,16 @@ class UserService(
     def get_by_actor_ids(self, *, actor_ids: List[int]) -> List[RpcUser]:
         pass
 
+    @abstractmethod
+    def update_user(
+        self,
+        *,
+        user_id: int,
+        attrs: UserUpdateArgs,
+    ) -> Any:
+        # Returns a serialized user
+        pass
+
     def get_user(self, user_id: int) -> Optional[RpcUser]:
         """
         This method returns a User object given an ID

+ 11 - 0
src/sentry/services/hybrid_cloud/user/impl.py

@@ -24,6 +24,7 @@ from sentry.services.hybrid_cloud.user import (
     UserFilterArgs,
     UserSerializeType,
     UserService,
+    UserUpdateArgs,
 )
 
 
@@ -121,6 +122,16 @@ class DatabaseBackedUserService(
     def get_by_actor_ids(self, *, actor_ids: List[int]) -> List[RpcUser]:
         return [self._serialize_rpc(u) for u in self._base_query().filter(actor_id__in=actor_ids)]
 
+    def update_user(
+        self,
+        *,
+        user_id: int,
+        attrs: UserUpdateArgs,
+    ) -> Any:
+        if len(attrs):
+            User.objects.filter(id=user_id).update(**attrs)
+        return self.serialize_many(filter=dict(user_ids=[user_id]))[0]
+
     def close(self) -> None:
         pass
 

+ 13 - 5
src/sentry/types/region.py

@@ -4,6 +4,7 @@ import functools
 from dataclasses import dataclass
 from enum import Enum
 from typing import TYPE_CHECKING, Iterable
+from urllib.parse import urljoin
 
 from sentry.silo import SiloMode
 
@@ -34,7 +35,7 @@ class Region:
     """The address of the region's silo.
 
     Represent a region's hostname or subdomain in a production environment
-    (e.g., "eu.sentry.io"), and addresses such as "localhost:8001" in a dev
+    (e.g., "https://eu.sentry.io"), and addresses such as "http://localhost:8001" in a dev
     environment.
 
     (This attribute is a placeholder. Please update this docstring when its
@@ -45,18 +46,16 @@ class Region:
     """The region's category."""
 
     def __post_init__(self) -> None:
-        from sentry.utils.snowflake import NULL_REGION_ID, REGION_ID
+        from sentry.utils.snowflake import REGION_ID
 
         REGION_ID.validate(self.id)
-        if self.id == NULL_REGION_ID:
-            raise ValueError(f"Region ID {NULL_REGION_ID} is reserved for non-multi-region systems")
 
     def to_url(self, path: str) -> str:
         """Resolve a path into a URL on this region's silo.
 
         (This method is a placeholder. See the `address` attribute.)
         """
-        return self.address + path
+        return urljoin(self.address, path)
 
 
 class RegionResolutionError(Exception):
@@ -126,6 +125,15 @@ def get_local_region() -> Region:
     """
     from django.conf import settings
 
+    if SiloMode.get_current_mode() == SiloMode.MONOLITH:
+        # This is a dummy value used to make region.to_url work
+        return Region(
+            name="monolith",
+            id=0,
+            address="/",
+            category=RegionCategory.MULTI_TENANT,
+        )
+
     if SiloMode.get_current_mode() != SiloMode.REGION:
         raise RegionContextError("Not a region silo")
 

+ 7 - 2
tests/sentry/types/test_region.py

@@ -49,5 +49,10 @@ class RegionMappingTest(TestCase):
                 assert get_local_region() == regions[0]
 
             with override_settings(SILO_MODE=SiloMode.MONOLITH):
-                with pytest.raises(RegionContextError):
-                    get_local_region()
+                # The relative address and the 0 id are the only important parts of this region value
+                assert get_local_region() == Region(
+                    name="monolith",
+                    id=0,
+                    address="/",
+                    category=RegionCategory.MULTI_TENANT,
+                )