Browse Source

feat(backup): User import sanitization (#55067)

Importing User models onto SaaS could be dangerous: the imported user
might have overpowered flags (`is_staff`, `is_superuser`, etc),
excessive `UserPermission`s, or naughty `UserRole`s assigned. These
changes modify the import logic remove sanitize those potentially bad
inputs.

Such sanitization only needs to happen sometimes, though: if you are
using this tool to restore a full self-hosted instance, you actually DO
want all of this potentially dangerous data to be imported unchanged
from your own exports. To resolve this, this change introduces the
concept of `ImportScope`s, which maps very closely to `RelocationScope`.
Using `import_in_global_scope` therefore does not perform sanitization,
while the other, narrower `User` and `Organization` scopes do.

Issue: getsentry/team-ospo#166
Issue: getsentry/team-ospo#181
Alex Zaslavsky 1 year ago
parent
commit
36691696ed

+ 99 - 0
fixtures/backup/user-with-maximum-privileges.json

@@ -0,0 +1,99 @@
+[
+    {
+        "model": "sentry.user",
+        "pk": 1,
+        "fields": {
+            "password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=",
+            "last_login": null,
+            "username": "testing@example.com",
+            "name": "",
+            "email": "testing@example.com",
+            "is_staff": true,
+            "is_active": true,
+            "is_superuser": true,
+            "is_managed": false,
+            "is_sentry_app": null,
+            "is_password_expired": false,
+            "last_password_change": "2023-06-22T22:59:57.023Z",
+            "flags": "0",
+            "session_nonce": null,
+            "date_joined": "2023-06-22T22:59:55.488Z",
+            "last_active": "2023-06-22T22:59:55.489Z",
+            "avatar_type": 0,
+            "avatar_url": null
+        }
+    },
+    {
+        "model": "sentry.authenticator",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "created_at": "2023-07-27T16:30:53.325Z",
+            "last_used_at": null,
+            "type": 1,
+            "config": "\"\""
+        }
+    },
+    {
+        "model": "sentry.useremail",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "email": "testing@example.com",
+            "validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE",
+            "date_hash_added": "2023-06-22T22:59:55.521Z",
+            "is_verified": true
+        }
+    },
+    {
+        "model": "sentry.userip",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "ip_address": "127.0.0.2",
+            "country_code": null,
+            "region_code": null,
+            "first_seen": "2012-04-05T03:29:45.000Z",
+            "last_seen": "2012-04-05T03:29:45.000Z"
+        }
+    },
+    {
+        "model": "sentry.useroption",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "project_id": null,
+            "organization_id": null,
+            "key": "timezone",
+            "value": "\"Europe/Vienna\""
+        }
+    },
+    {
+        "model": "sentry.userpermission",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "permission": "users.admin"
+        }
+    },
+    {
+        "model": "sentry.userrole",
+        "pk": 1,
+        "fields": {
+            "date_updated": "2023-06-22T23:00:00.123Z",
+            "date_added": "2023-06-22T22:54:27.960Z",
+            "name": "Super Admin",
+            "permissions": "['broadcasts.admin', 'users.admin', 'options.admin']"
+        }
+    },
+    {
+        "model": "sentry.userroleuser",
+        "pk": 1,
+        "fields": {
+            "date_updated": "2023-06-22T23:00:00.123Z",
+            "date_added": "2023-06-22T22:59:57.000Z",
+            "user": 1,
+            "role": 1
+        }
+    }
+]

+ 71 - 0
fixtures/backup/user-with-minimum-privileges.json

@@ -0,0 +1,71 @@
+[
+    {
+        "model": "sentry.user",
+        "pk": 1,
+        "fields": {
+            "password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=",
+            "last_login": null,
+            "username": "testing@example.com",
+            "name": "",
+            "email": "testing@example.com",
+            "is_staff": false,
+            "is_active": true,
+            "is_superuser": false,
+            "is_managed": false,
+            "is_sentry_app": null,
+            "is_password_expired": false,
+            "last_password_change": "2023-06-22T22:59:57.023Z",
+            "flags": "0",
+            "session_nonce": null,
+            "date_joined": "2023-06-22T22:59:55.488Z",
+            "last_active": "2023-06-22T22:59:55.489Z",
+            "avatar_type": 0,
+            "avatar_url": null
+        }
+    },
+    {
+        "model": "sentry.authenticator",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "created_at": "2023-07-27T16:30:53.325Z",
+            "last_used_at": null,
+            "type": 1,
+            "config": "\"\""
+        }
+    },
+    {
+        "model": "sentry.useremail",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "email": "testing@example.com",
+            "validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE",
+            "date_hash_added": "2023-06-22T22:59:55.521Z",
+            "is_verified": true
+        }
+    },
+    {
+        "model": "sentry.userip",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "ip_address": "127.0.0.2",
+            "country_code": null,
+            "region_code": null,
+            "first_seen": "2012-04-05T03:29:45.000Z",
+            "last_seen": "2012-04-05T03:29:45.000Z"
+        }
+    },
+    {
+        "model": "sentry.useroption",
+        "pk": 1,
+        "fields": {
+            "user": 1,
+            "project_id": null,
+            "organization_id": null,
+            "key": "timezone",
+            "value": "\"Europe/Vienna\""
+        }
+    }
+]

+ 45 - 5
src/sentry/backup/imports.py

@@ -10,6 +10,15 @@ from django.db import IntegrityError, connection, transaction
 
 from sentry.backup.dependencies import PrimaryKeyMap, normalize_model_name
 from sentry.backup.helpers import EXCLUDED_APPS
+from sentry.backup.scopes import ImportScope
+from sentry.silo import unguarded_write
+
+__all__ = (
+    "OldImportConfig",
+    "import_in_user_scope",
+    "import_in_organization_scope",
+    "import_in_global_scope",
+)
 
 
 class OldImportConfig(NamedTuple):
@@ -27,12 +36,19 @@ class OldImportConfig(NamedTuple):
     use_natural_foreign_keys: bool = False
 
 
-def imports(src, old_config: OldImportConfig, printer=click.echo):
-    """Imports core data for the Sentry installation."""
+def _import(src, scope: ImportScope, old_config: OldImportConfig, printer=click.echo):
+    """
+    Imports core data for a Sentry installation.
+
+    It is generally preferable to avoid calling this function directly, as there are certain combinations of input parameters that should not be used together. Instead, use one of the other wrapper functions in this file, named `import_in_XXX_scope()`.
+    """
 
     try:
         # Import / export only works in monolith mode with a consolidated db.
-        with transaction.atomic("default"):
+        # TODO(getsentry/team-ospo#185): the `unguarded_write` is temporary until we get and RPC
+        # service up for writing to control silo models.
+        with unguarded_write(using="default"), transaction.atomic("default"):
+            allowed_relocation_scopes = scope.value
             pk_map = PrimaryKeyMap()
             for obj in serializers.deserialize(
                 "json", src, stream=True, use_natural_keys=old_config.use_natural_foreign_keys
@@ -43,9 +59,9 @@ def imports(src, old_config: OldImportConfig, printer=click.echo):
                     # to roll out the new API to self-hosted.
                     if old_config.use_update_instead_of_create:
                         obj.save()
-                    else:
+                    elif o.get_relocation_scope() in allowed_relocation_scopes:
                         o = obj.object
-                        written = o.write_relocation_import(pk_map, obj)
+                        written = o.write_relocation_import(pk_map, obj, scope)
                         if written is not None:
                             old_pk, new_pk = written
                             model_name = normalize_model_name(o)
@@ -72,3 +88,27 @@ def imports(src, old_config: OldImportConfig, printer=click.echo):
 
     with connection.cursor() as cursor:
         cursor.execute(sequence_reset_sql.getvalue())
+
+
+def import_in_user_scope(src, printer=click.echo):
+    """
+    Perform an import in the `User` scope, meaning that only models with `RelocationScope.User` will be imported from the provided `src` file.
+    """
+    return _import(src, ImportScope.User, OldImportConfig(), printer)
+
+
+def import_in_organization_scope(src, printer=click.echo):
+    """
+    Perform an import in the `Organization` scope, meaning that only models with `RelocationScope.User` or `RelocationScope.Organization` will be imported from the provided `src` file.
+    """
+    return _import(src, ImportScope.Organization, OldImportConfig(), printer)
+
+
+def import_in_global_scope(src, printer=click.echo):
+    """
+    Perform an import in the `Global` scope, meaning that all models will be imported from the
+    provided source file. Because a `Global` import is really only useful when restoring to a fresh
+    Sentry instance, some behaviors in this scope are different from the others. In particular,
+    superuser privileges are not sanitized.
+    """
+    return _import(src, ImportScope.Global, OldImportConfig(), printer)

+ 27 - 0
src/sentry/backup/mixins.py

@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from typing import Optional, Tuple
+
+from django.core.serializers.base import DeserializedObject
+
+from sentry.backup.dependencies import PrimaryKeyMap
+from sentry.backup.scopes import ImportScope
+
+
+class SanitizeUserImportsMixin:
+    """
+    The only realistic reason to do a `Global`ly-scoped import is when restoring some full-instance
+    backup to a clean install. In this case, one may want to import so-called "superusers": users
+    with powerful various instance-wide permissions generally reserved for admins and instance
+    maintainers. Thus, for security reasons, running this import in any `ImportScope` other than
+    `Global` will sanitize user imports by ignoring imports of the `UserPermission`, `UserRole`, and
+    `UserRoleUser` models.
+    """
+
+    def write_relocation_import(
+        self, pk_map: PrimaryKeyMap, obj: DeserializedObject, scope: ImportScope
+    ) -> Optional[Tuple[int, int]]:
+        if scope != ImportScope.Global:
+            return None
+
+        return super().write_relocation_import(pk_map, obj, scope)  # type: ignore[misc]

+ 16 - 2
src/sentry/backup/scopes.py

@@ -13,9 +13,23 @@ class RelocationScope(Enum):
     # to a specific user.
     Global = auto()
 
+    # For all models that transitively depend on either `User` or `Organization` root models, and
+    # nothing else.
+    Organization = auto()
+
     # Any `Control`-silo model that is either a `User*` model, or directly owner by one, is in this
     # scope.
     User = auto()
 
-    # For all `Region`-siloed models tied to a specific `Organization`.
-    Organization = auto()
+
+@unique
+class ImportScope(Enum):
+    """
+    When executing the `sentry import` command, these scopes specify which of the above
+    `RelocationScope`s should be included in the final import. The basic idea is that each of these
+    scopes is inclusive of its predecessor in terms of which `RelocationScope`s it accepts.
+    """
+
+    User = {RelocationScope.User}
+    Organization = {RelocationScope.User, RelocationScope.Organization}
+    Global = {RelocationScope.User, RelocationScope.Organization, RelocationScope.Global}

+ 4 - 4
src/sentry/db/models/base.py

@@ -9,7 +9,7 @@ from django.db.models import signals
 from django.utils import timezone
 
 from sentry.backup.dependencies import PrimaryKeyMap, dependencies, normalize_model_name
-from sentry.backup.scopes import RelocationScope
+from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.silo import SiloLimit, SiloMode
 
 from .fields.bounded import BoundedBigAutoField
@@ -108,7 +108,7 @@ class BaseModel(models.Model):
 
         return self.__relocation_scope__
 
-    def _normalize_before_relocation_import(self, pk_map: PrimaryKeyMap) -> int:
+    def _normalize_before_relocation_import(self, pk_map: PrimaryKeyMap, _: ImportScope) -> int:
         """
         A helper function that normalizes a deserialized model. Note that this modifies the model in place, so it should generally be done inside of the companion `write_relocation_import` method, to avoid data skew or corrupted local state.
 
@@ -136,13 +136,13 @@ class BaseModel(models.Model):
         return old_pk
 
     def write_relocation_import(
-        self, pk_map: PrimaryKeyMap, obj: DeserializedObject
+        self, pk_map: PrimaryKeyMap, obj: DeserializedObject, scope: ImportScope
     ) -> Optional[Tuple[int, int]]:
         """
         Writes a deserialized model to the database. If this write is successful, this method will return a tuple of the old and new `pk`s.
         """
 
-        old_pk = self._normalize_before_relocation_import(pk_map)
+        old_pk = self._normalize_before_relocation_import(pk_map, scope)
         obj.save(force_insert=True)
         return (old_pk, self.pk)
 

+ 3 - 3
src/sentry/models/actor.py

@@ -10,7 +10,7 @@ from django.db.models.signals import post_save
 from rest_framework import serializers
 
 from sentry.backup.dependencies import PrimaryKeyMap
-from sentry.backup.scopes import RelocationScope
+from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.db.models import Model, region_silo_only_model
 from sentry.db.models.fields.foreignkey import FlexibleForeignKey
 from sentry.db.models.fields.hybrid_cloud_foreign_key import HybridCloudForeignKey
@@ -143,8 +143,8 @@ class Actor(Model):
         return self.get_actor_tuple().get_actor_identifier()
 
     # TODO(hybrid-cloud): actor refactor. Remove this method when done.
-    def _normalize_before_relocation_import(self, pk_map: PrimaryKeyMap) -> int:
-        old_pk = super()._normalize_before_relocation_import(pk_map)
+    def _normalize_before_relocation_import(self, pk_map: PrimaryKeyMap, scope: ImportScope) -> int:
+        old_pk = super()._normalize_before_relocation_import(pk_map, scope)
 
         # `Actor` and `Team` have a direct circular dependency between them for the time being due
         # to an ongoing refactor (that is, `Actor` foreign keys directly into `Team`, and `Team`

+ 3 - 3
src/sentry/models/team.py

@@ -12,7 +12,7 @@ from django.utils.translation import gettext_lazy as _
 
 from sentry.app import env
 from sentry.backup.dependencies import PrimaryKeyMap
-from sentry.backup.scopes import RelocationScope
+from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.constants import ObjectStatus
 from sentry.db.models import (
     BaseManager,
@@ -372,9 +372,9 @@ class Team(Model, SnowflakeIdMixin):
 
     # TODO(hybrid-cloud): actor refactor. Remove this method when done.
     def write_relocation_import(
-        self, pk_map: PrimaryKeyMap, obj: DeserializedObject
+        self, pk_map: PrimaryKeyMap, obj: DeserializedObject, scope: ImportScope
     ) -> Optional[Tuple[int, int]]:
-        written = super().write_relocation_import(pk_map, obj)
+        written = super().write_relocation_import(pk_map, obj, scope)
         if written is not None:
             (_, new_pk) = written
 

+ 12 - 1
src/sentry/models/user.py

@@ -15,7 +15,8 @@ from django.utils.translation import gettext_lazy as _
 
 from bitfield import TypedClassBitField
 from sentry.auth.authenticators import available_authenticators
-from sentry.backup.scopes import RelocationScope
+from sentry.backup.dependencies import PrimaryKeyMap
+from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.db.models import (
     BaseManager,
     BaseModel,
@@ -369,6 +370,16 @@ class User(BaseModel, AbstractBaseUser):
     def clear_lost_passwords(self):
         LostPasswordHash.objects.filter(user=self).delete()
 
+    def _normalize_before_relocation_import(self, pk_map: PrimaryKeyMap, scope: ImportScope) -> int:
+        old_pk = super()._normalize_before_relocation_import(pk_map, scope)
+        if scope != ImportScope.Global:
+            self.is_staff = False
+            self.is_superuser = False
+
+        # TODO(getsentry/team-ospo#181): Handle usernames that already exist.
+
+        return old_pk
+
 
 # HACK(dcramer): last_login needs nullable for Django 1.8
 User._meta.get_field("last_login").null = True

+ 3 - 3
src/sentry/models/useremail.py

@@ -12,7 +12,7 @@ from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 
 from sentry.backup.dependencies import PrimaryKeyMap
-from sentry.backup.scopes import RelocationScope
+from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.db.models import (
     BaseManager,
     FlexibleForeignKey,
@@ -88,9 +88,9 @@ class UserEmail(Model):
     # with `sentry.user` simultaneously? Will need to make more robust user handling logic, and to
     # test what happens when a UserEmail already exists.
     def write_relocation_import(
-        self, pk_map: PrimaryKeyMap, _: DeserializedObject
+        self, pk_map: PrimaryKeyMap, obj: DeserializedObject, scope: ImportScope
     ) -> Optional[Tuple[int, int]]:
-        old_pk = super()._normalize_before_relocation_import(pk_map)
+        old_pk = super()._normalize_before_relocation_import(pk_map, scope)
         (useremail, _) = self.__class__.objects.get_or_create(
             user=self.user, email=self.email, defaults=model_to_dict(self)
         )

Some files were not shown because too many files changed in this diff