Browse Source

Revert "feat(backup): Enable import over RPC (#56689)"

This reverts commit 57686ae0756bca8a30048b403e812b0ca161de89.

Co-authored-by: azaslavsky <3709945+azaslavsky@users.noreply.github.com>
getsentry-bot 1 year ago
parent
commit
05f116b371

+ 32 - 0
fixtures/backup/model_dependencies/detailed.json

@@ -5851,6 +5851,38 @@
     "table_name": "sentry_userrole_users",
     "uniques": []
   },
+  "sessions.session": {
+    "dangling": false,
+    "foreign_keys": {},
+    "model": "sessions.session",
+    "relocation_dependencies": [],
+    "relocation_scope": "Excluded",
+    "silos": [
+      "Monolith"
+    ],
+    "table_name": "django_session",
+    "uniques": [
+      [
+        "session_key"
+      ]
+    ]
+  },
+  "sites.site": {
+    "dangling": false,
+    "foreign_keys": {},
+    "model": "sites.site",
+    "relocation_dependencies": [],
+    "relocation_scope": "Excluded",
+    "silos": [
+      "Monolith"
+    ],
+    "table_name": "django_site",
+    "uniques": [
+      [
+        "domain"
+      ]
+    ]
+  },
   "social_auth.usersocialauth": {
     "dangling": false,
     "foreign_keys": {

+ 2 - 0
fixtures/backup/model_dependencies/flat.json

@@ -807,6 +807,8 @@
     "sentry.user",
     "sentry.userrole"
   ],
+  "sessions.session": [],
+  "sites.site": [],
   "social_auth.usersocialauth": [
     "sentry.user"
   ]

+ 2 - 0
fixtures/backup/model_dependencies/sorted.json

@@ -49,6 +49,8 @@
   "sentry.userpermission",
   "sentry.userrole",
   "sentry.userroleuser",
+  "sessions.session",
+  "sites.site",
   "social_auth.usersocialauth",
   "sentry.savedsearch",
   "sentry.release",

+ 2 - 0
fixtures/backup/model_dependencies/truncate.json

@@ -49,6 +49,8 @@
   "sentry_userpermission",
   "sentry_userrole",
   "sentry_userrole_users",
+  "django_session",
+  "django_site",
   "social_auth_usersocialauth",
   "sentry_savedsearch",
   "sentry_release",

+ 1 - 0
pyproject.toml

@@ -604,6 +604,7 @@ module = [
     "sentry.services.smtp",
     "sentry.shared_integrations.client.base",
     "sentry.shared_integrations.client.proxy",
+    "sentry.silo.base",
     "sentry.similarity.backends.dummy",
     "sentry.similarity.features",
     "sentry.snuba.discover",

+ 0 - 5
src/sentry/backup/dependencies.py

@@ -379,11 +379,6 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
         model_iterator = app_config.get_models()
 
         for model in model_iterator:
-            # Ignore some native Django models, since other models don't reference them and we don't
-            # really use them for business logic.
-            if model._meta.app_label in {"sessions", "sites"}:
-                continue
-
             foreign_keys: dict[str, ForeignField] = dict()
             uniques: set[frozenset[str]] = {
                 frozenset(combo) for combo in model._meta.unique_together

+ 8 - 2
src/sentry/backup/exports.py

@@ -1,9 +1,10 @@
 from __future__ import annotations
 
 import io
-from typing import BinaryIO
+from typing import BinaryIO, Type
 
 import click
+from django.db.models.base import Model
 
 from sentry.backup.dependencies import (
     PrimaryKeyMap,
@@ -92,6 +93,11 @@ def _export(
         else:
             raise ValueError("Filter arguments must only apply to `Organization` or `User` models")
 
+    def get_exporter_for_model(model: Type[Model]):
+        if SiloMode.CONTROL in model._meta.silo_limit.modes:  # type: ignore
+            return import_export_service.export_by_model
+        return ImportExportService.get_local_implementation().export_by_model  # type: ignore
+
     # TODO(getsentry/team-ospo#190): Another optimization opportunity to use a generator with ijson # to print the JSON objects in a streaming manner.
     for model in sorted_dependencies():
         from sentry.db.models.base import BaseModel
@@ -110,7 +116,7 @@ def _export(
             continue
 
         dep_models = {get_model_name(d) for d in model_relations.get_dependencies_for_relocation()}
-        export_by_model = ImportExportService.get_exporter_for_model(model)
+        export_by_model = get_exporter_for_model(model)
         result = export_by_model(
             model_name=str(model_name),
             scope=RpcExportScope.into_rpc(scope),

+ 82 - 61
src/sentry/backup/imports.py

@@ -3,34 +3,20 @@ from __future__ import annotations
 from typing import BinaryIO, Iterator, Optional, Tuple, Type
 
 import click
+from django.conf import settings
 from django.core import serializers
-from django.db import transaction
+from django.core.exceptions import ValidationError as DjangoValidationError
+from django.db import IntegrityError, connections, router, transaction
 from django.db.models.base import Model
+from rest_framework.serializers import ValidationError as DjangoRestFrameworkValidationError
 
-from sentry.backup.dependencies import (
-    NormalizedModelName,
-    PrimaryKeyMap,
-    dependencies,
-    get_model_name,
-)
-from sentry.backup.helpers import Filter, ImportFlags, decrypt_encrypted_tarball
+from sentry.backup.dependencies import NormalizedModelName, PrimaryKeyMap, get_model, get_model_name
+from sentry.backup.helpers import EXCLUDED_APPS, Filter, ImportFlags, decrypt_encrypted_tarball
 from sentry.backup.scopes import ImportScope
-from sentry.services.hybrid_cloud.import_export.model import (
-    RpcFilter,
-    RpcImportError,
-    RpcImportErrorKind,
-    RpcImportFlags,
-    RpcImportScope,
-    RpcPrimaryKeyMap,
-)
-from sentry.services.hybrid_cloud.import_export.service import ImportExportService
-from sentry.silo.base import SiloMode
-from sentry.silo.safety import unguarded_write
+from sentry.silo import unguarded_write
 from sentry.utils import json
-from sentry.utils.env import is_split_db
 
 __all__ = (
-    "ImportingError",
     "import_in_user_scope",
     "import_in_organization_scope",
     "import_in_config_scope",
@@ -38,11 +24,6 @@ __all__ = (
 )
 
 
-class ImportingError(Exception):
-    def __init__(self, context: RpcImportError) -> None:
-        self.context = context
-
-
 def _import(
     src: BinaryIO,
     scope: ImportScope,
@@ -64,11 +45,6 @@ def _import(
     from sentry.models.organizationmember import OrganizationMember
     from sentry.models.user import User
 
-    if SiloMode.get_current_mode() == SiloMode.CONTROL:
-        errText = "Imports must be run in REGION or MONOLITH instances only"
-        printer(errText, err=True)
-        raise RuntimeError(errText)
-
     flags = flags if flags is not None else ImportFlags()
     user_model_name = get_model_name(User)
     org_model_name = get_model_name(Organization)
@@ -178,38 +154,83 @@ def _import(
     # of how we do atomicity: on a per-model (if using multiple dbs) or global (if using a single
     # db) basis.
     def do_write():
+        allowed_relocation_scopes = scope.value
         pk_map = PrimaryKeyMap()
-        for model_name, json_data in yield_json_models(src):
-            model_relations = dependencies().get(model_name)
-            if not model_relations:
-                continue
-
-            dep_models = {
-                get_model_name(d) for d in model_relations.get_dependencies_for_relocation()
-            }
-            import_by_model = ImportExportService.get_importer_for_model(model_relations.model)
-            result = import_by_model(
-                model_name=str(model_name),
-                scope=RpcImportScope.into_rpc(scope),
-                flags=RpcImportFlags.into_rpc(flags),
-                filter_by=[RpcFilter.into_rpc(f) for f in filters],
-                pk_map=RpcPrimaryKeyMap.into_rpc(pk_map.partition(dep_models)),
-                json_data=json_data,
-            )
-
-            if isinstance(result, RpcImportError):
-                printer(result.pretty(), err=True)
-                if result.get_kind() == RpcImportErrorKind.IntegrityError:
-                    warningText = ">> Are you restoring from a backup of the same version of Sentry?\n>> Are you restoring onto a clean database?\n>> If so then this IntegrityError might be our fault, you can open an issue here:\n>> https://github.com/getsentry/sentry/issues/new/choose"
-                    printer(warningText, err=True)
-                raise ImportingError(result)
-            pk_map.extend(result.mapped_pks)
-
-    if SiloMode.get_current_mode() == SiloMode.MONOLITH and not is_split_db():
-        with unguarded_write(using="default"), transaction.atomic(using="default"):
+        for (batch_model_name, batch) in yield_json_models(src):
+            model = get_model(batch_model_name)
+            if model is None:
+                raise ValueError("Unknown model name")
+
+            using = router.db_for_write(model)
+            with transaction.atomic(using=using):
+                count = 0
+                for obj in serializers.deserialize("json", batch, use_natural_keys=False):
+                    o = obj.object
+                    if o._meta.app_label not in EXCLUDED_APPS or o:
+                        if o.get_possible_relocation_scopes() & allowed_relocation_scopes:
+                            o = obj.object
+                            model_name = get_model_name(o)
+                            for f in filters:
+                                if f.model == type(o) and getattr(o, f.field, None) not in f.values:
+                                    break
+                            else:
+                                # We can only be sure `get_relocation_scope()` will be correct if it
+                                # is fired AFTER normalization, as some `get_relocation_scope()`
+                                # methods rely on being able to correctly resolve foreign keys,
+                                # which is only possible after normalization.
+                                old_pk = o.normalize_before_relocation_import(pk_map, scope, flags)
+                                if old_pk is None:
+                                    continue
+
+                                # Now that the model has been normalized, we can ensure that this
+                                # particular instance has a `RelocationScope` that permits
+                                # importing.
+                                if not o.get_relocation_scope() in allowed_relocation_scopes:
+                                    continue
+
+                                written = o.write_relocation_import(scope, flags)
+                                if written is None:
+                                    continue
+
+                                new_pk, import_kind = written
+                                slug = getattr(o, "slug", None)
+                                pk_map.insert(model_name, old_pk, new_pk, import_kind, slug)
+                                count += 1
+
+                # If we wrote at least one model, make sure to update the sequences too.
+                if count > 0:
+                    table = o._meta.db_table
+                    seq = f"{table}_id_seq"
+                    with connections[using].cursor() as cursor:
+                        cursor.execute(f"SELECT setval(%s, (SELECT MAX(id) FROM {table}))", [seq])
+
+    try:
+        if len(settings.DATABASES) == 1:
+            # TODO(getsentry/team-ospo#185): This is currently untested in single-db mode. Fix ASAP!
+            with unguarded_write(using="default"), transaction.atomic("default"):
+                do_write()
+        else:
             do_write()
-    else:
-        do_write()
+
+    # For all database integrity errors, let's warn users to follow our
+    # recommended backup/restore workflow before reraising exception. Most of
+    # these errors come from restoring on a different version of Sentry or not restoring
+    # on a clean install.
+    except IntegrityError as e:
+        warningText = ">> Are you restoring from a backup of the same version of Sentry?\n>> Are you restoring onto a clean database?\n>> If so then this IntegrityError might be our fault, you can open an issue here:\n>> https://github.com/getsentry/sentry/issues/new/choose"
+        printer(
+            warningText,
+            err=True,
+        )
+        raise (e)
+
+    # Calls to `write_relocation_import` may fail validation and throw either a
+    # `DjangoValidationError` when a call to `.full_clean()` failed, or a
+    # `DjangoRestFrameworkValidationError` when a call to a custom DRF serializer failed. This
+    # exception catcher converts instances of the former to the latter.
+    except DjangoValidationError as e:
+        errs = {field: error for field, error in e.message_dict.items()}
+        raise DjangoRestFrameworkValidationError(errs) from e
 
 
 def import_in_user_scope(

+ 6 - 8
src/sentry/runner/commands/backup.py

@@ -5,6 +5,12 @@ import click
 from sentry.backup.comparators import get_default_comparators
 from sentry.backup.findings import FindingJSONEncoder
 from sentry.backup.helpers import ImportFlags
+from sentry.backup.imports import (
+    import_in_config_scope,
+    import_in_global_scope,
+    import_in_organization_scope,
+    import_in_user_scope,
+)
 from sentry.backup.validate import validate
 from sentry.runner.decorators import configuration
 from sentry.utils import json
@@ -130,8 +136,6 @@ def import_users(src, decrypt_with, filter_usernames, merge_users, silent):
     Import the Sentry users from an exported JSON file.
     """
 
-    from sentry.backup.imports import import_in_user_scope
-
     import_in_user_scope(
         src,
         decrypt_with=decrypt_with,
@@ -169,8 +173,6 @@ def import_organizations(src, decrypt_with, filter_org_slugs, merge_users, silen
     Import the Sentry organizations, and all constituent Sentry users, from an exported JSON file.
     """
 
-    from sentry.backup.imports import import_in_organization_scope
-
     import_in_organization_scope(
         src,
         decrypt_with=decrypt_with,
@@ -206,8 +208,6 @@ def import_config(src, decrypt_with, merge_users, overwrite_configs, silent):
     Import all configuration and administrator accounts needed to set up this Sentry instance.
     """
 
-    from sentry.backup.imports import import_in_config_scope
-
     import_in_config_scope(
         src,
         decrypt_with=decrypt_with,
@@ -236,8 +236,6 @@ def import_global(src, decrypt_with, silent, overwrite_configs):
     Import all Sentry data from an exported JSON file.
     """
 
-    from sentry.backup.imports import import_in_global_scope
-
     import_in_global_scope(
         src,
         decrypt_with=decrypt_with,

+ 4 - 185
src/sentry/services/hybrid_cloud/import_export/impl.py

@@ -5,12 +5,8 @@
 
 from typing import List, Optional, Set
 
-from django.core.exceptions import ValidationError as DjangoValidationError
-from django.core.serializers import deserialize, serialize
-from django.core.serializers.base import DeserializationError
-from django.db import DatabaseError, IntegrityError, connections, router, transaction
+from django.core.serializers import serialize
 from django.db.models import Q
-from rest_framework.serializers import ValidationError as DjangoRestFrameworkValidationError
 
 from sentry.backup.dependencies import (
     ImportKind,
@@ -21,7 +17,7 @@ from sentry.backup.dependencies import (
     get_model_name,
 )
 from sentry.backup.findings import InstanceID
-from sentry.backup.helpers import EXCLUDED_APPS, DatetimeSafeDjangoJSONEncoder, Filter
+from sentry.backup.helpers import DatetimeSafeDjangoJSONEncoder, Filter
 from sentry.backup.scopes import ExportScope
 from sentry.models.user import User
 from sentry.models.userpermission import UserPermission
@@ -33,12 +29,6 @@ from sentry.services.hybrid_cloud.import_export.model import (
     RpcExportResult,
     RpcExportScope,
     RpcFilter,
-    RpcImportError,
-    RpcImportErrorKind,
-    RpcImportFlags,
-    RpcImportOk,
-    RpcImportResult,
-    RpcImportScope,
     RpcPrimaryKeyMap,
 )
 from sentry.services.hybrid_cloud.import_export.service import ImportExportService
@@ -60,175 +50,6 @@ class UniversalImportExportService(ImportExportService):
             ImportExportService.get_local_implementation().export_by_model(...)
     """
 
-    def import_by_model(
-        self,
-        *,
-        model_name: str,
-        scope: Optional[RpcImportScope] = None,
-        flags: RpcImportFlags,
-        filter_by: List[RpcFilter],
-        pk_map: RpcPrimaryKeyMap,
-        json_data: str,
-    ) -> RpcImportResult:
-        import_flags = flags.from_rpc()
-        batch_model_name = NormalizedModelName(model_name)
-        model = get_model(batch_model_name)
-        if model is None:
-            return RpcImportError(
-                kind=RpcImportErrorKind.UnknownModel,
-                on=InstanceID(model_name),
-                reason=f"The model `{model_name}` could not be found",
-            )
-
-        silo_mode = SiloMode.get_current_mode()
-        model_modes = model._meta.silo_limit.modes  # type: ignore
-        if silo_mode != SiloMode.MONOLITH and silo_mode not in model_modes:
-            return RpcImportError(
-                kind=RpcImportErrorKind.IncorrectSiloModeForModel,
-                on=InstanceID(model_name),
-                reason=f"The model `{model_name}` was forwarded to the incorrect silo (it cannot be imported from the {silo_mode} silo)",
-            )
-
-        if scope is None:
-            return RpcImportError(
-                kind=RpcImportErrorKind.UnspecifiedScope,
-                on=InstanceID(model_name),
-                reason="The RPC was called incorrectly, please set an `ImportScope` parameter",
-            )
-
-        import_scope = scope.from_rpc()
-        in_pk_map = pk_map.from_rpc()
-        filters: List[Filter] = []
-        for fb in filter_by:
-            if NormalizedModelName(fb.model_name) == batch_model_name:
-                filters.append(fb.from_rpc())
-
-        try:
-            using = router.db_for_write(model)
-            with transaction.atomic(using=using):
-                ok_relocation_scopes = import_scope.value
-                out_pk_map = PrimaryKeyMap()
-                max_pk = 0
-                counter = 0
-                for deserialized_object in deserialize("json", json_data, use_natural_keys=False):
-                    counter += 1
-                    model_instance = deserialized_object.object
-                    if model_instance._meta.app_label not in EXCLUDED_APPS or model_instance:
-                        if model_instance.get_possible_relocation_scopes() & ok_relocation_scopes:
-                            inst_model_name = get_model_name(model_instance)
-                            if inst_model_name != batch_model_name:
-                                return RpcImportError(
-                                    kind=RpcImportErrorKind.UnexpectedModel,
-                                    on=InstanceID(model=str(inst_model_name), ordinal=1),
-                                    left_pk=model_instance.pk,
-                                    reason=f"Received model of kind `{str(inst_model_name)}` when `{str(batch_model_name)}` was expected",
-                                )
-
-                            for f in filters:
-                                if getattr(model_instance, f.field, None) not in f.values:
-                                    break
-                            else:
-                                try:
-                                    # We can only be sure `get_relocation_scope()` will be correct
-                                    # if it is fired AFTER normalization, as some
-                                    # `get_relocation_scope()` methods rely on being able to
-                                    # correctly resolve foreign keys, which is only possible after
-                                    # normalization.
-                                    old_pk = model_instance.normalize_before_relocation_import(
-                                        in_pk_map, import_scope, import_flags
-                                    )
-                                    if old_pk is None:
-                                        continue
-
-                                    # Now that the model has been normalized, we can ensure that
-                                    # this particular instance has a `RelocationScope` that permits
-                                    # importing.
-                                    if (
-                                        not model_instance.get_relocation_scope()
-                                        in ok_relocation_scopes
-                                    ):
-                                        continue
-
-                                    # Perform the actual database write.
-                                    written = model_instance.write_relocation_import(
-                                        import_scope, import_flags
-                                    )
-                                    if written is None:
-                                        continue
-
-                                    # For models that may have circular references to themselves
-                                    # (unlikely), keep track of the new pk in the input map as well.
-                                    new_pk, import_kind = written
-                                    slug = getattr(model_instance, "slug", None)
-                                    in_pk_map.insert(
-                                        inst_model_name, old_pk, new_pk, import_kind, slug
-                                    )
-                                    out_pk_map.insert(
-                                        inst_model_name, old_pk, new_pk, import_kind, slug
-                                    )
-                                    if new_pk > max_pk:
-                                        max_pk = new_pk
-
-                                except DjangoValidationError as e:
-                                    errs = {field: error for field, error in e.message_dict.items()}
-                                    return RpcImportError(
-                                        kind=RpcImportErrorKind.ValidationError,
-                                        on=InstanceID(model_name, ordinal=counter),
-                                        left_pk=model_instance.pk,
-                                        reason=f"Django validation error encountered: {errs}",
-                                    )
-
-                                except DjangoRestFrameworkValidationError as e:
-                                    return RpcImportError(
-                                        kind=RpcImportErrorKind.ValidationError,
-                                        on=InstanceID(model_name, ordinal=counter),
-                                        left_pk=model_instance.pk,
-                                        reason=str(e),
-                                    )
-
-            # If we wrote at least one model, make sure to update the sequences too.
-            if counter > 0:
-                table = model_instance._meta.db_table
-                seq = f"{table}_id_seq"
-                with connections[using].cursor() as cursor:
-                    cursor.execute(f"SELECT setval(%s, (SELECT MAX(id) FROM {table}))", [seq])
-
-            return RpcImportOk(
-                mapped_pks=RpcPrimaryKeyMap.into_rpc(out_pk_map),
-                max_pk=max_pk,
-                num_imported=counter,
-            )
-
-        except DeserializationError:
-            return RpcImportError(
-                kind=RpcImportErrorKind.DeserializationFailed,
-                on=InstanceID(model_name),
-                reason="The submitted JSON could not be deserialized into Django model instances",
-            )
-
-        # Catch `IntegrityError` before `DatabaseError`, since the former is a subclass of the
-        # latter.
-        except IntegrityError as e:
-            return RpcImportError(
-                kind=RpcImportErrorKind.IntegrityError,
-                on=InstanceID(model_name),
-                reason=str(e),
-            )
-
-        except DatabaseError as e:
-            return RpcImportError(
-                kind=RpcImportErrorKind.DatabaseError,
-                on=InstanceID(model_name),
-                reason=str(e),
-            )
-
-        except Exception as e:
-            return RpcImportError(
-                kind=RpcImportErrorKind.Unknown,
-                on=InstanceID(model_name),
-                reason=f"Unknown internal error occurred: {e}",
-            )
-
     def export_by_model(
         self,
         *,
@@ -273,7 +94,7 @@ class UniversalImportExportService(ImportExportService):
             allowed_relocation_scopes = export_scope.value
             possible_relocation_scopes = model.get_possible_relocation_scopes()
             includable = possible_relocation_scopes & allowed_relocation_scopes
-            if not includable:
+            if not includable or model._meta.proxy:
                 return RpcExportError(
                     kind=RpcExportErrorKind.UnexportableModel,
                     on=InstanceID(model_name),
@@ -321,9 +142,7 @@ class UniversalImportExportService(ImportExportService):
                             # For models that may have circular references to themselves (unlikely),
                             # keep track of the new pk in the input map as well.
                             nonlocal max_pk
-                            if item.pk > max_pk:
-                                max_pk = item.pk
-
+                            max_pk = item.pk
                             in_pk_map.insert(model_name, item.pk, item.pk, ImportKind.Inserted)
                             out_pk_map.insert(model_name, item.pk, item.pk, ImportKind.Inserted)
                             yield item

Some files were not shown because too many files changed in this diff