1 year ago · c7d1cc6928
--- a/fixtures/backup/single-integration.json
+++ b/fixtures/backup/single-integration.json
@@ -0,0 +1,14 @@
 
				+ [
			
 
				+    {
			
 
				+        "model": "sentry.integration",
			
 
				+        "pk": 1,
			
 
				+        "fields": {
			
 
				+            "date_added": "2023-06-22T00:00:00.000Z",
			
 
				+            "date_updated": "2023-06-23T00:00:00.000Z",
			
 
				+            "external_id": "slack:test-org",
			
 
				+            "name": "Slack for test-org",
			
 
				+            "provider": "slack",
			
 
				+            "status": 0
			
 
				+        }
			
 
				+    }
			
 
				+]
			
--- a/src/sentry/backup/findings.py
+++ b/src/sentry/backup/findings.py
@@ -37,6 +37,9 @@ class ComparatorFindingKind(FindingKind):
 
				     # The instances of a particular model did not maintain total ordering of pks (that is, pks did not appear in ascending order, or appear multiple times).
			
 
				     UnorderedInput = auto()
			
 
				 
			
 
				+    # Multiple instances of the same custom ordinal signature exist in the input.
			
 
				+    DuplicateCustomOrdinal = auto()
			
 
				+
			
 
				     # The number of instances of a particular model on the left and right side of the input were not
			
 
				     # equal.
			
 
				     UnequalCounts = auto()
			
--- a/src/sentry/backup/mixins.py
+++ b/src/sentry/backup/mixins.py
@@ -9,23 +9,25 @@ from sentry.backup.scopes import ImportScope, RelocationScope
 
				 
			
 
				 class OverwritableConfigMixin:
			
 
				     """
			
 
				-    Handles the `ImportFlags.overwrite_configs` setting when it's piped through to a `RelocationScope.Config` model with at least one `unique=True` field, thereby handling the collision in the manner the importer requested.
			
 
				+    Handles the `ImportFlags.overwrite_configs` setting when it's piped through to a
			
 
				+    `RelocationScope.Config` model with at least one `unique=True` field, thereby handling the
			
 
				+    collision in the manner the importer requested.
			
 
				     """
			
 
				 
			
 
				+    # TODO(getsentry/team-ospo#190): Clean up the type checking in this method.
			
 
				     def write_relocation_import(
			
 
				         self, scope: ImportScope, flags: ImportFlags
			
 
				     ) -> Optional[Tuple[int, ImportKind]]:
			
 
				-        # TODO(getsentry/team-ospo#190): Clean up the type checking here.
			
 
				-        if self.get_relocation_scope() == RelocationScope.Config:  # type: ignore
			
 
				-            # Get all unique sets that will potentially cause collisions.
			
 
				-            uniq_sets = dependencies()[get_model_name(self)].get_uniques_without_foreign_keys()  # type: ignore
			
 
				+        # Get all unique sets that will potentially cause collisions.
			
 
				+        uniq_sets = dependencies()[get_model_name(self)].get_uniques_without_foreign_keys()  # type: ignore
			
 
				+
			
 
				+        # Don't use this mixin for models with multiple unique sets; write custom logic instead.
			
 
				+        assert len(uniq_sets) <= 1
			
 
				 
			
 
				-            # Don't use this mixin for models with multiple unique sets; write custom logic instead.
			
 
				-            if len(uniq_sets) > 1:
			
 
				-                raise ValueError(
			
 
				-                    "Cannot use `OverwritableConfigMixin` on model with multiple unique sets"
			
 
				-                )
			
 
				+        # Must set `__relocation_custom_ordinal__` on models that use this mixin.
			
 
				+        assert getattr(self.__class__, "__relocation_custom_ordinal__", None) is not None
			
 
				 
			
 
				+        if self.get_relocation_scope() == RelocationScope.Config:  # type: ignore
			
 
				             if len(uniq_sets) == 1:
			
 
				                 uniq_set = uniq_sets[0]
			
 
				                 query = dict()
			
--- a/src/sentry/backup/validate.py
+++ b/src/sentry/backup/validate.py
@@ -1,12 +1,13 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				+from collections import OrderedDict as ordereddict
			
 
				 from collections import defaultdict
			
 
				 from copy import deepcopy
			
 
				 from difflib import unified_diff
			
 
				-from typing import Dict, Tuple
			
 
				+from typing import Dict, OrderedDict, Tuple
			
 
				 
			
 
				 from sentry.backup.comparators import ComparatorMap, ForeignKeyComparator, get_default_comparators
			
 
				-from sentry.backup.dependencies import ImportKind, NormalizedModelName, PrimaryKeyMap
			
 
				+from sentry.backup.dependencies import ImportKind, NormalizedModelName, PrimaryKeyMap, get_model
			
 
				 from sentry.backup.findings import (
			
 
				     ComparatorFinding,
			
 
				     ComparatorFindingKind,
			
@@ -33,53 +34,111 @@ def validate(
 
				     class OrdinalCounter:
			
 
				         """Keeps track of the next ordinal to be assigned for a given model kind."""
			
 
				 
			
 
				-        max_seen_pk: int
			
 
				+        # The `value` being tracked is either the custom ordering tuple for this model (see: `BaseModel::get_relocation_ordinal_fields()` method), or otherwise just the pk.
			
 
				+        max_seen_ordinal_value: int | tuple | None
			
 
				         next_ordinal: int
			
 
				 
			
 
				         def __init__(self):
			
 
				-            self.max_seen_pk = -1
			
 
				+            self.max_seen_ordinal_value = None
			
 
				             self.next_ordinal = 1
			
 
				 
			
 
				-        def assign(self, obj: JSONData, side: Side) -> Tuple[int, list[ComparatorFinding]]:
			
 
				+        def assign(
			
 
				+            self, obj: JSONData, ordinal_value: int | tuple, side: Side
			
 
				+        ) -> Tuple[InstanceID, list[ComparatorFinding]]:
			
 
				             """Assigns the next available ordinal to the supplied `obj` model."""
			
 
				 
			
 
				             pk = obj["pk"]
			
 
				             model_name = NormalizedModelName(obj["model"])
			
 
				             findings = []
			
 
				-            if pk > self.max_seen_pk:
			
 
				-                self.max_seen_pk = pk
			
 
				+            if (
			
 
				+                self.max_seen_ordinal_value is None
			
 
				+                or ordinal_value > self.max_seen_ordinal_value  # type: ignore
			
 
				+            ):
			
 
				+                self.max_seen_ordinal_value = ordinal_value
			
 
				             else:
			
 
				+                # Only `pk`-based collisions are reported here; collisions for custom ordinals are
			
 
				+                # caught earlier.
			
 
				+                assert not isinstance(self.max_seen_ordinal_value, tuple)
			
 
				+
			
 
				                 findings.append(
			
 
				                     ComparatorFinding(
			
 
				                         kind=ComparatorFindingKind.UnorderedInput,
			
 
				                         on=InstanceID(str(model_name), self.next_ordinal),
			
 
				                         left_pk=pk if side == Side.left else None,
			
 
				                         right_pk=pk if side == Side.right else None,
			
 
				-                        reason=f"""instances not listed in ascending `pk` order; `pk` {pk} is less than or equal to {self.max_seen_pk} which precedes it""",
			
 
				+                        reason=f"""instances not listed in ascending `pk` order; `pk` {pk} is less than or equal to {self.max_seen_ordinal_value} which precedes it""",
			
 
				                     )
			
 
				                 )
			
 
				 
			
 
				             obj["ordinal"] = self.next_ordinal
			
 
				             self.next_ordinal += 1
			
 
				-            return (obj["ordinal"], findings if findings else [])
			
 
				+
			
 
				+            return (InstanceID(str(model_name), obj["ordinal"]), findings if findings else [])
			
 
				 
			
 
				     OrdinalCounters = Dict[NormalizedModelName, OrdinalCounter]
			
 
				-    ModelMap = Dict[InstanceID, JSONData]
			
 
				+    ModelMap = Dict[NormalizedModelName, OrderedDict[InstanceID, JSONData]]
			
 
				 
			
 
				     def build_model_map(
			
 
				         models: JSONData, side: Side, findings: ComparatorFindings
			
 
				     ) -> Tuple[ModelMap, OrdinalCounters]:
			
 
				         """Does two things in tandem: builds a map of InstanceID -> JSON model, and simultaneously builds a map of model name -> number of ordinals assigned."""
			
 
				 
			
 
				-        model_map: ModelMap = {}
			
 
				+        from sentry.db.models import BaseModel
			
 
				+        from sentry.models.user import User
			
 
				+
			
 
				+        model_map: ModelMap = defaultdict(ordereddict)
			
 
				         ordinal_counters: OrdinalCounters = defaultdict(OrdinalCounter)
			
 
				+        need_ordering: dict[NormalizedModelName, Dict[tuple, JSONData]] = defaultdict(dict)
			
 
				+        pks_to_usernames: dict[int, str] = dict()
			
 
				+
			
 
				         for model in models:
			
 
				+            pk = model["pk"]
			
 
				             model_name = NormalizedModelName(model["model"])
			
 
				-            counter = ordinal_counters[model_name]
			
 
				-            ordinal, found = counter.assign(model, side)
			
 
				-            findings.extend(found)
			
 
				-            id = InstanceID(str(model_name), ordinal)
			
 
				-            model_map[id] = model
			
 
				+            model_type = get_model(model_name)
			
 
				+            if model_type is None or not issubclass(model_type, BaseModel):
			
 
				+                raise RuntimeError("Unknown model class")
			
 
				+
			
 
				+            if model_type == User:
			
 
				+                pks_to_usernames[pk] = model["fields"]["username"]
			
 
				+
			
 
				+            custom_ordinal_fields = model_type.get_relocation_ordinal_fields()
			
 
				+            if custom_ordinal_fields is None:
			
 
				+                id, found = ordinal_counters[model_name].assign(model, pk, side)
			
 
				+                findings.extend(found)
			
 
				+                model_map[model_name][id] = model
			
 
				+                continue
			
 
				+
			
 
				+            custom_ordinal_parts = []
			
 
				+            for field in custom_ordinal_fields:
			
 
				+                # Special case: for `user` pks, look through the user to the `username` instead.
			
 
				+                if field == "user" or field == "user_id":
			
 
				+                    custom_ordinal_parts.append(pks_to_usernames[model["fields"][field]])
			
 
				+                else:
			
 
				+                    custom_ordinal_parts.append(model["fields"][field])
			
 
				+
			
 
				+            ordinal = tuple(custom_ordinal_parts)
			
 
				+            if need_ordering[model_name].get(ordinal) is not None:
			
 
				+                findings.append(
			
 
				+                    ComparatorFinding(
			
 
				+                        kind=ComparatorFindingKind.DuplicateCustomOrdinal,
			
 
				+                        on=InstanceID(str(model_name), None),
			
 
				+                        left_pk=pk if side == Side.left else None,
			
 
				+                        right_pk=pk if side == Side.right else None,
			
 
				+                        reason=f"""custom ordinal value `{ordinal}` appears multiple times""",
			
 
				+                    )
			
 
				+                )
			
 
				+
			
 
				+            need_ordering[model_name][ordinal] = model
			
 
				+
			
 
				+        for model_name, models in need_ordering.items():
			
 
				+            # Sort the models by key, which is a tuple of ordered custom ordinal field values,
			
 
				+            # specific to the model in question.
			
 
				+            ordered_models = dict(sorted(models.items()))
			
 
				+            for ordinal_value, model in ordered_models.items():
			
 
				+                id, found = ordinal_counters[model_name].assign(model, ordinal_value, side)
			
 
				+                findings.extend(found)
			
 
				+                model_map[model_name][id] = model
			
 
				+
			
 
				         return (model_map, ordinal_counters)
			
 
				 
			
 
				     def json_lines(obj: JSONData) -> list[str]:
			
@@ -134,59 +193,63 @@ def validate(
 
				 
			
 
				     # Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
			
 
				     # model that we encounter later.
			
 
				-    for id, right in right_models.items():
			
 
				-        if id.ordinal is None:
			
 
				-            raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
			
 
				-
			
 
				-        left = left_models[id]
			
 
				-        left_pk_map.insert(
			
 
				-            NormalizedModelName(id.model), left_models[id]["pk"], id.ordinal, ImportKind.Inserted
			
 
				-        )
			
 
				-        right_pk_map.insert(
			
 
				-            NormalizedModelName(id.model), right["pk"], id.ordinal, ImportKind.Inserted
			
 
				-        )
			
 
				+    for model_name, models in right_models.items():
			
 
				+        for id, right in models.items():
			
 
				+            assert id.ordinal is not None
			
 
				+
			
 
				+            left = left_models[model_name][id]
			
 
				+            left_pk_map.insert(
			
 
				+                NormalizedModelName(id.model),
			
 
				+                left_models[model_name][id]["pk"],
			
 
				+                id.ordinal,
			
 
				+                ImportKind.Inserted,
			
 
				+            )
			
 
				+            right_pk_map.insert(
			
 
				+                NormalizedModelName(id.model), right["pk"], id.ordinal, ImportKind.Inserted
			
 
				+            )
			
 
				 
			
 
				     # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
			
 
				     # outputs.
			
 
				-    for id, right in right_models.items():
			
 
				-        if id.ordinal is None:
			
 
				-            raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
			
 
				-
			
 
				-        # Try comparators applicable for this specific model.
			
 
				-        left = left_models[id]
			
 
				-        if id.model in comparators:
			
 
				-            # We take care to run ALL of the `compare()` methods on each comparator before calling
			
 
				-            # any `scrub()` methods. This ensures that, in cases where a single model uses multiple
			
 
				-            # comparators that touch the same fields, one comparator does not accidentally scrub the
			
 
				-            # inputs for its follower. If `compare()` functions are well-behaved (that is, they
			
 
				-            # don't mutate their inputs), this should be sufficient to ensure that the order in
			
 
				-            # which comparators are applied does not change the final output.
			
 
				-            for cmp in comparators[id.model]:
			
 
				-                ex = cmp.existence(id, left, right)
			
 
				-                if ex:
			
 
				-                    findings.extend(ex)
			
 
				-                    continue
			
 
				-
			
 
				-                if isinstance(cmp, ForeignKeyComparator):
			
 
				-                    cmp.set_primary_key_maps(left_pk_map, right_pk_map)
			
 
				-
			
 
				-                res = cmp.compare(id, left, right)
			
 
				-                if res:
			
 
				-                    findings.extend(res)
			
 
				-            for cmp in comparators[id.model]:
			
 
				-                cmp.scrub(left, right)
			
 
				-
			
 
				-        # Finally, perform a diff on the remaining JSON.
			
 
				-        diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
			
 
				-        if diff:
			
 
				-            findings.append(
			
 
				-                ComparatorFinding(
			
 
				-                    kind=ComparatorFindingKind.UnequalJSON,
			
 
				-                    on=id,
			
 
				-                    left_pk=left["pk"],
			
 
				-                    right_pk=right["pk"],
			
 
				-                    reason="\n    " + "\n    ".join(diff),
			
 
				+    for model_name, models in right_models.items():
			
 
				+        for id, right in models.items():
			
 
				+            assert id.ordinal is not None
			
 
				+
			
 
				+            # Try comparators applicable for this specific model.
			
 
				+            left = left_models[model_name][id]
			
 
				+            if id.model in comparators:
			
 
				+                # We take care to run ALL of the `compare()` methods on each comparator before
			
 
				+                # calling any `scrub()` methods. This ensures that, in cases where a single model
			
 
				+                # uses multiple comparators that touch the same fields, one comparator does not
			
 
				+                # accidentally scrub the inputs for its follower. If `compare()` functions are
			
 
				+                # well-behaved (that is, they don't mutate their inputs), this should be sufficient
			
 
				+                # to ensure that the order in which comparators are applied does not change the
			
 
				+                # final output.
			
 
				+                for cmp in comparators[id.model]:
			
 
				+                    ex = cmp.existence(id, left, right)
			
 
				+                    if ex:
			
 
				+                        findings.extend(ex)
			
 
				+                        continue
			
 
				+
			
 
				+                    if isinstance(cmp, ForeignKeyComparator):
			
 
				+                        cmp.set_primary_key_maps(left_pk_map, right_pk_map)
			
 
				+
			
 
				+                    res = cmp.compare(id, left, right)
			
 
				+                    if res:
			
 
				+                        findings.extend(res)
			
 
				+                for cmp in comparators[id.model]:
			
 
				+                    cmp.scrub(left, right)
			
 
				+
			
 
				+            # Finally, perform a diff on the remaining JSON.
			
 
				+            diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
			
 
				+            if diff:
			
 
				+                findings.append(
			
 
				+                    ComparatorFinding(
			
 
				+                        kind=ComparatorFindingKind.UnequalJSON,
			
 
				+                        on=id,
			
 
				+                        left_pk=left["pk"],
			
 
				+                        right_pk=right["pk"],
			
 
				+                        reason="\n    " + "\n    ".join(diff),
			
 
				+                    )
			
 
				                 )
			
 
				-            )
			
 
				 
			
 
				     return findings
			
--- a/src/sentry/db/models/base.py
+++ b/src/sentry/db/models/base.py
@@ -50,6 +50,11 @@ class BaseModel(models.Model):
 
				     __relocation_scope__: RelocationScope | set[RelocationScope]
			
 
				     __relocation_dependencies__: set[str]
			
 
				 
			
 
				+    # Some models have a globally unique identifier, like a UUID. This should be a set of one or
			
 
				+    # more fields, none of which are foreign keys, that are `unique=True` or `unique_together` for
			
 
				+    # an entire Sentry instance.
			
 
				+    __relocation_custom_ordinal__: list[str] | None = None
			
 
				+
			
 
				     objects: ClassVar[BaseManager[Self]] = BaseManager()
			
 
				 
			
 
				     update = update
			
@@ -123,6 +128,21 @@ class BaseModel(models.Model):
 
				 
			
 
				         return self.__relocation_scope__
			
 
				 
			
 
				+    @classmethod
			
 
				+    def get_relocation_ordinal_fields(self) -> None | list[str]:
			
 
				+        """
			
 
				+        Retrieves the custom ordinal fields for models that may be re-used at import time (that is,
			
 
				+        the `write_relocation_import()` method may return an `ImportKind` besides
			
 
				+        `ImportKind.Inserted`). In such cases, we want an ordering of models by a globally unique
			
 
				+        value that is not the `pk`, to ensure that merged and inserted models are still ordered
			
 
				+        correctly with respect to one another.
			
 
				+        """
			
 
				+
			
 
				+        if self.__relocation_custom_ordinal__ is None:
			
 
				+            return None
			
 
				+
			
 
				+        return self.__relocation_custom_ordinal__
			
 
				+
			
 
				     @classmethod
			
 
				     def get_possible_relocation_scopes(cls) -> set[RelocationScope]:
			
 
				         """
			
--- a/src/sentry/models/email.py
+++ b/src/sentry/models/email.py
@@ -22,6 +22,7 @@ class Email(Model):
 
				 
			
 
				     __relocation_scope__ = RelocationScope.User
			
 
				     __relocation_dependencies__ = {"sentry.User"}
			
 
				+    __relocation_custom_ordinal__ = ["email"]
			
 
				 
			
 
				     email = CIEmailField(_("email address"), unique=True, max_length=75)
			
 
				     date_added = models.DateTimeField(default=timezone.now)
			
--- a/src/sentry/models/options/option.py
+++ b/src/sentry/models/options/option.py
@@ -32,6 +32,7 @@ class BaseOption(OverwritableConfigMixin, Model):
 
				 
			
 
				     # Subclasses should overwrite the relocation scope as appropriate.
			
 
				     __relocation_scope__ = RelocationScope.Excluded
			
 
				+    __relocation_custom_ordinal__ = ["key"]
			
 
				 
			
 
				     key = models.CharField(max_length=128, unique=True)
			
 
				     last_updated = models.DateTimeField(default=timezone.now)
			
--- a/src/sentry/models/relay.py
+++ b/src/sentry/models/relay.py
@@ -11,6 +11,7 @@ from sentry.db.models import Model, region_silo_only_model
 
				 @region_silo_only_model
			
 
				 class RelayUsage(OverwritableConfigMixin, Model):
			
 
				     __relocation_scope__ = RelocationScope.Config
			
 
				+    __relocation_custom_ordinal__ = ["relay_id", "version"]
			
 
				 
			
 
				     relay_id = models.CharField(max_length=64)
			
 
				     version = models.CharField(max_length=32, default="0.0.1")
			
@@ -27,6 +28,7 @@ class RelayUsage(OverwritableConfigMixin, Model):
 
				 @region_silo_only_model
			
 
				 class Relay(OverwritableConfigMixin, Model):
			
 
				     __relocation_scope__ = RelocationScope.Config
			
 
				+    __relocation_custom_ordinal__ = ["relay_id"]
			
 
				 
			
 
				     relay_id = models.CharField(max_length=64, unique=True)
			
 
				     public_key = models.CharField(max_length=200)
			
--- a/src/sentry/models/user.py
+++ b/src/sentry/models/user.py
@@ -86,6 +86,8 @@ class UserManager(BaseManager["User"], DjangoUserManager):
 
				 @control_silo_only_model
			
 
				 class User(BaseModel, AbstractBaseUser):
			
 
				     __relocation_scope__ = RelocationScope.User
			
 
				+    __relocation_custom_ordinal__ = ["username"]
			
 
				+
			
 
				     replication_version: int = 2
			
 
				 
			
 
				     id = BoundedBigAutoField(primary_key=True)
			
--- a/src/sentry/models/useremail.py
+++ b/src/sentry/models/useremail.py
@@ -46,6 +46,7 @@ class UserEmailManager(BaseManager["UserEmail"]):
 
				 class UserEmail(ControlOutboxProducingModel):
			
 
				     __relocation_scope__ = RelocationScope.User
			
 
				     __relocation_dependencies__ = {"sentry.Email"}
			
 
				+    __relocation_custom_ordinal__ = ["user", "email"]
			
 
				 
			
 
				     user = FlexibleForeignKey(settings.AUTH_USER_MODEL, related_name="emails")
			
 
				     email = models.EmailField(_("email address"), max_length=75)
			
@@ -102,7 +103,7 @@ class UserEmail(ControlOutboxProducingModel):
 
				         if old_pk is None:
			
 
				             return None
			
 
				 
			
 
				-        # If we are merging users, ignore the imported email and use the merged user's email
			
 
				+        # If we are merging users, ignore the imported email and use the existing user's email
			
 
				         # instead.
			
 
				         if pk_map.get_kind(get_model_name(User), old_user_id) == ImportKind.Existing:
			
 
				             return None