@@ -1,12 +1,13 @@
from __future__ import annotations
+from collections import OrderedDict as ordereddict
from collections import defaultdict
from copy import deepcopy
from difflib import unified_diff
-from typing import Dict, Tuple
+from typing import Dict, OrderedDict, Tuple
from sentry.backup.comparators import ComparatorMap, ForeignKeyComparator, get_default_comparators
-from sentry.backup.dependencies import ImportKind, NormalizedModelName, PrimaryKeyMap
+from sentry.backup.dependencies import ImportKind, NormalizedModelName, PrimaryKeyMap, get_model
from sentry.backup.findings import (
@@ -33,53 +34,111 @@ def validate(
class OrdinalCounter:
"""Keeps track of the next ordinal to be assigned for a given model kind."""
- max_seen_pk: int
+ # The `value` being tracked is either the custom ordering tuple for this model (see: `BaseModel::get_relocation_ordinal_fields()` method), or otherwise just the pk.
+ max_seen_ordinal_value: int | tuple | None
next_ordinal: int
def __init__(self):
- self.max_seen_pk = -1
+ self.max_seen_ordinal_value = None
self.next_ordinal = 1
- def assign(self, obj: JSONData, side: Side) -> Tuple[int, list[ComparatorFinding]]:
+ def assign(
+ self, obj: JSONData, ordinal_value: int | tuple, side: Side
+ ) -> Tuple[InstanceID, list[ComparatorFinding]]:
"""Assigns the next available ordinal to the supplied `obj` model."""
pk = obj["pk"]
model_name = NormalizedModelName(obj["model"])
findings = []
- if pk > self.max_seen_pk:
- self.max_seen_pk = pk
+ if (
+ self.max_seen_ordinal_value is None
+ or ordinal_value > self.max_seen_ordinal_value # type: ignore
+ ):
+ self.max_seen_ordinal_value = ordinal_value
+ # Only `pk`-based collisions are reported here; collisions for custom ordinals are
+ # caught earlier.
+ assert not isinstance(self.max_seen_ordinal_value, tuple)
on=InstanceID(str(model_name), self.next_ordinal),
left_pk=pk if side == Side.left else None,
right_pk=pk if side == Side.right else None,
- reason=f"""instances not listed in ascending `pk` order; `pk` {pk} is less than or equal to {self.max_seen_pk} which precedes it""",
+ reason=f"""instances not listed in ascending `pk` order; `pk` {pk} is less than or equal to {self.max_seen_ordinal_value} which precedes it""",
obj["ordinal"] = self.next_ordinal
self.next_ordinal += 1
- return (obj["ordinal"], findings if findings else [])
+ return (InstanceID(str(model_name), obj["ordinal"]), findings if findings else [])
OrdinalCounters = Dict[NormalizedModelName, OrdinalCounter]
- ModelMap = Dict[InstanceID, JSONData]
+ ModelMap = Dict[NormalizedModelName, OrderedDict[InstanceID, JSONData]]
def build_model_map(
models: JSONData, side: Side, findings: ComparatorFindings
) -> Tuple[ModelMap, OrdinalCounters]:
"""Does two things in tandem: builds a map of InstanceID -> JSON model, and simultaneously builds a map of model name -> number of ordinals assigned."""
- model_map: ModelMap = {}
+ from sentry.db.models import BaseModel
+ from sentry.models.user import User
+ model_map: ModelMap = defaultdict(ordereddict)
ordinal_counters: OrdinalCounters = defaultdict(OrdinalCounter)
+ need_ordering: dict[NormalizedModelName, Dict[tuple, JSONData]] = defaultdict(dict)
+ pks_to_usernames: dict[int, str] = dict()
for model in models:
+ pk = model["pk"]
model_name = NormalizedModelName(model["model"])
- counter = ordinal_counters[model_name]
- ordinal, found = counter.assign(model, side)
- findings.extend(found)
- id = InstanceID(str(model_name), ordinal)
- model_map[id] = model
+ model_type = get_model(model_name)
+ if model_type is None or not issubclass(model_type, BaseModel):
+ raise RuntimeError("Unknown model class")
+ if model_type == User:
+ pks_to_usernames[pk] = model["fields"]["username"]
+ custom_ordinal_fields = model_type.get_relocation_ordinal_fields()
+ if custom_ordinal_fields is None:
+ id, found = ordinal_counters[model_name].assign(model, pk, side)
+ findings.extend(found)
+ model_map[model_name][id] = model
+ continue
+ custom_ordinal_parts = []
+ for field in custom_ordinal_fields:
+ # Special case: for `user` pks, look through the user to the `username` instead.
+ if field == "user" or field == "user_id":
+ custom_ordinal_parts.append(pks_to_usernames[model["fields"][field]])
+ else:
+ custom_ordinal_parts.append(model["fields"][field])
+ ordinal = tuple(custom_ordinal_parts)
+ if need_ordering[model_name].get(ordinal) is not None:
+ findings.append(
+ ComparatorFinding(
+ kind=ComparatorFindingKind.DuplicateCustomOrdinal,
+ on=InstanceID(str(model_name), None),
+ left_pk=pk if side == Side.left else None,
+ right_pk=pk if side == Side.right else None,
+ reason=f"""custom ordinal value `{ordinal}` appears multiple times""",
+ )
+ )
+ need_ordering[model_name][ordinal] = model
+ for model_name, models in need_ordering.items():
+ # Sort the models by key, which is a tuple of ordered custom ordinal field values,
+ # specific to the model in question.
+ ordered_models = dict(sorted(models.items()))
+ for ordinal_value, model in ordered_models.items():
+ id, found = ordinal_counters[model_name].assign(model, ordinal_value, side)
+ findings.extend(found)
+ model_map[model_name][id] = model
return (model_map, ordinal_counters)
def json_lines(obj: JSONData) -> list[str]:
@@ -134,59 +193,63 @@ def validate(
# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
# model that we encounter later.
- for id, right in right_models.items():
- if id.ordinal is None:
- raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
- left = left_models[id]
- left_pk_map.insert(
- NormalizedModelName(id.model), left_models[id]["pk"], id.ordinal, ImportKind.Inserted
- )
- right_pk_map.insert(
- NormalizedModelName(id.model), right["pk"], id.ordinal, ImportKind.Inserted
- )
+ for model_name, models in right_models.items():
+ for id, right in models.items():
+ assert id.ordinal is not None
+ left = left_models[model_name][id]
+ left_pk_map.insert(
+ NormalizedModelName(id.model),
+ left_models[model_name][id]["pk"],
+ id.ordinal,
+ ImportKind.Inserted,
+ )
+ right_pk_map.insert(
+ NormalizedModelName(id.model), right["pk"], id.ordinal, ImportKind.Inserted
+ )
# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
# outputs.
- for id, right in right_models.items():
- if id.ordinal is None:
- raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
- # Try comparators applicable for this specific model.
- left = left_models[id]
- if id.model in comparators:
- # We take care to run ALL of the `compare()` methods on each comparator before calling
- # any `scrub()` methods. This ensures that, in cases where a single model uses multiple
- # comparators that touch the same fields, one comparator does not accidentally scrub the
- # inputs for its follower. If `compare()` functions are well-behaved (that is, they
- # don't mutate their inputs), this should be sufficient to ensure that the order in
- # which comparators are applied does not change the final output.
- for cmp in comparators[id.model]:
- ex = cmp.existence(id, left, right)
- if ex:
- findings.extend(ex)
- continue
- if isinstance(cmp, ForeignKeyComparator):
- cmp.set_primary_key_maps(left_pk_map, right_pk_map)
- res = cmp.compare(id, left, right)
- if res:
- findings.extend(res)
- for cmp in comparators[id.model]:
- cmp.scrub(left, right)
- # Finally, perform a diff on the remaining JSON.
- diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
- if diff:
- findings.append(
- ComparatorFinding(
- kind=ComparatorFindingKind.UnequalJSON,
- on=id,
- left_pk=left["pk"],
- right_pk=right["pk"],
- reason="\n " + "\n ".join(diff),
+ for model_name, models in right_models.items():
+ for id, right in models.items():
+ assert id.ordinal is not None
+ # Try comparators applicable for this specific model.
+ left = left_models[model_name][id]
+ if id.model in comparators:
+ # We take care to run ALL of the `compare()` methods on each comparator before
+ # calling any `scrub()` methods. This ensures that, in cases where a single model
+ # uses multiple comparators that touch the same fields, one comparator does not
+ # accidentally scrub the inputs for its follower. If `compare()` functions are
+ # well-behaved (that is, they don't mutate their inputs), this should be sufficient
+ # to ensure that the order in which comparators are applied does not change the
+ # final output.
+ for cmp in comparators[id.model]:
+ ex = cmp.existence(id, left, right)
+ if ex:
+ findings.extend(ex)
+ continue
+ if isinstance(cmp, ForeignKeyComparator):
+ cmp.set_primary_key_maps(left_pk_map, right_pk_map)
+ res = cmp.compare(id, left, right)
+ if res:
+ findings.extend(res)
+ for cmp in comparators[id.model]:
+ cmp.scrub(left, right)
+ # Finally, perform a diff on the remaining JSON.
+ diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
+ if diff:
+ findings.append(
+ ComparatorFinding(
+ kind=ComparatorFindingKind.UnequalJSON,
+ on=id,
+ left_pk=left["pk"],
+ right_pk=right["pk"],
+ reason="\n " + "\n ".join(diff),
+ )
- )
return findings