|
@@ -1,12 +1,13 @@
|
|
|
from __future__ import annotations
|
|
|
|
|
|
+from collections import OrderedDict as ordereddict
|
|
|
from collections import defaultdict
|
|
|
from copy import deepcopy
|
|
|
from difflib import unified_diff
|
|
|
-from typing import Dict, Tuple
|
|
|
+from typing import Dict, OrderedDict, Tuple
|
|
|
|
|
|
from sentry.backup.comparators import ComparatorMap, ForeignKeyComparator, get_default_comparators
|
|
|
-from sentry.backup.dependencies import ImportKind, NormalizedModelName, PrimaryKeyMap
|
|
|
+from sentry.backup.dependencies import ImportKind, NormalizedModelName, PrimaryKeyMap, get_model
|
|
|
from sentry.backup.findings import (
|
|
|
ComparatorFinding,
|
|
|
ComparatorFindingKind,
|
|
@@ -33,53 +34,111 @@ def validate(
|
|
|
class OrdinalCounter:
|
|
|
"""Keeps track of the next ordinal to be assigned for a given model kind."""
|
|
|
|
|
|
- max_seen_pk: int
|
|
|
+ # The `value` being tracked is either the custom ordering tuple for this model (see: `BaseModel::get_relocation_ordinal_fields()` method), or otherwise just the pk.
|
|
|
+ max_seen_ordinal_value: int | tuple | None
|
|
|
next_ordinal: int
|
|
|
|
|
|
def __init__(self):
|
|
|
- self.max_seen_pk = -1
|
|
|
+ self.max_seen_ordinal_value = None
|
|
|
self.next_ordinal = 1
|
|
|
|
|
|
- def assign(self, obj: JSONData, side: Side) -> Tuple[int, list[ComparatorFinding]]:
|
|
|
+ def assign(
|
|
|
+ self, obj: JSONData, ordinal_value: int | tuple, side: Side
|
|
|
+ ) -> Tuple[InstanceID, list[ComparatorFinding]]:
|
|
|
"""Assigns the next available ordinal to the supplied `obj` model."""
|
|
|
|
|
|
pk = obj["pk"]
|
|
|
model_name = NormalizedModelName(obj["model"])
|
|
|
findings = []
|
|
|
- if pk > self.max_seen_pk:
|
|
|
- self.max_seen_pk = pk
|
|
|
+ if (
|
|
|
+ self.max_seen_ordinal_value is None
|
|
|
+ or ordinal_value > self.max_seen_ordinal_value # type: ignore
|
|
|
+ ):
|
|
|
+ self.max_seen_ordinal_value = ordinal_value
|
|
|
else:
|
|
|
+ # Only `pk`-based collisions are reported here; collisions for custom ordinals are
|
|
|
+ # caught earlier.
|
|
|
+ assert not isinstance(self.max_seen_ordinal_value, tuple)
|
|
|
+
|
|
|
findings.append(
|
|
|
ComparatorFinding(
|
|
|
kind=ComparatorFindingKind.UnorderedInput,
|
|
|
on=InstanceID(str(model_name), self.next_ordinal),
|
|
|
left_pk=pk if side == Side.left else None,
|
|
|
right_pk=pk if side == Side.right else None,
|
|
|
- reason=f"""instances not listed in ascending `pk` order; `pk` {pk} is less than or equal to {self.max_seen_pk} which precedes it""",
|
|
|
+ reason=f"""instances not listed in ascending `pk` order; `pk` {pk} is less than or equal to {self.max_seen_ordinal_value} which precedes it""",
|
|
|
)
|
|
|
)
|
|
|
|
|
|
obj["ordinal"] = self.next_ordinal
|
|
|
self.next_ordinal += 1
|
|
|
- return (obj["ordinal"], findings if findings else [])
|
|
|
+
|
|
|
+ return (InstanceID(str(model_name), obj["ordinal"]), findings if findings else [])
|
|
|
|
|
|
OrdinalCounters = Dict[NormalizedModelName, OrdinalCounter]
|
|
|
- ModelMap = Dict[InstanceID, JSONData]
|
|
|
+ ModelMap = Dict[NormalizedModelName, OrderedDict[InstanceID, JSONData]]
|
|
|
|
|
|
def build_model_map(
|
|
|
models: JSONData, side: Side, findings: ComparatorFindings
|
|
|
) -> Tuple[ModelMap, OrdinalCounters]:
|
|
|
"""Does two things in tandem: builds a map of InstanceID -> JSON model, and simultaneously builds a map of model name -> number of ordinals assigned."""
|
|
|
|
|
|
- model_map: ModelMap = {}
|
|
|
+ from sentry.db.models import BaseModel
|
|
|
+ from sentry.models.user import User
|
|
|
+
|
|
|
+ model_map: ModelMap = defaultdict(ordereddict)
|
|
|
ordinal_counters: OrdinalCounters = defaultdict(OrdinalCounter)
|
|
|
+ need_ordering: dict[NormalizedModelName, Dict[tuple, JSONData]] = defaultdict(dict)
|
|
|
+ pks_to_usernames: dict[int, str] = dict()
|
|
|
+
|
|
|
for model in models:
|
|
|
+ pk = model["pk"]
|
|
|
model_name = NormalizedModelName(model["model"])
|
|
|
- counter = ordinal_counters[model_name]
|
|
|
- ordinal, found = counter.assign(model, side)
|
|
|
- findings.extend(found)
|
|
|
- id = InstanceID(str(model_name), ordinal)
|
|
|
- model_map[id] = model
|
|
|
+ model_type = get_model(model_name)
|
|
|
+ if model_type is None or not issubclass(model_type, BaseModel):
|
|
|
+ raise RuntimeError("Unknown model class")
|
|
|
+
|
|
|
+ if model_type == User:
|
|
|
+ pks_to_usernames[pk] = model["fields"]["username"]
|
|
|
+
|
|
|
+ custom_ordinal_fields = model_type.get_relocation_ordinal_fields()
|
|
|
+ if custom_ordinal_fields is None:
|
|
|
+ id, found = ordinal_counters[model_name].assign(model, pk, side)
|
|
|
+ findings.extend(found)
|
|
|
+ model_map[model_name][id] = model
|
|
|
+ continue
|
|
|
+
|
|
|
+ custom_ordinal_parts = []
|
|
|
+ for field in custom_ordinal_fields:
|
|
|
+ # Special case: for `user` pks, look through the user to the `username` instead.
|
|
|
+ if field == "user" or field == "user_id":
|
|
|
+ custom_ordinal_parts.append(pks_to_usernames[model["fields"][field]])
|
|
|
+ else:
|
|
|
+ custom_ordinal_parts.append(model["fields"][field])
|
|
|
+
|
|
|
+ ordinal = tuple(custom_ordinal_parts)
|
|
|
+ if need_ordering[model_name].get(ordinal) is not None:
|
|
|
+ findings.append(
|
|
|
+ ComparatorFinding(
|
|
|
+ kind=ComparatorFindingKind.DuplicateCustomOrdinal,
|
|
|
+ on=InstanceID(str(model_name), None),
|
|
|
+ left_pk=pk if side == Side.left else None,
|
|
|
+ right_pk=pk if side == Side.right else None,
|
|
|
+ reason=f"""custom ordinal value `{ordinal}` appears multiple times""",
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ need_ordering[model_name][ordinal] = model
|
|
|
+
|
|
|
+ for model_name, models in need_ordering.items():
|
|
|
+ # Sort the models by key, which is a tuple of ordered custom ordinal field values,
|
|
|
+ # specific to the model in question.
|
|
|
+ ordered_models = dict(sorted(models.items()))
|
|
|
+ for ordinal_value, model in ordered_models.items():
|
|
|
+ id, found = ordinal_counters[model_name].assign(model, ordinal_value, side)
|
|
|
+ findings.extend(found)
|
|
|
+ model_map[model_name][id] = model
|
|
|
+
|
|
|
return (model_map, ordinal_counters)
|
|
|
|
|
|
def json_lines(obj: JSONData) -> list[str]:
|
|
@@ -134,59 +193,63 @@ def validate(
|
|
|
|
|
|
# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
|
|
|
# model that we encounter later.
|
|
|
- for id, right in right_models.items():
|
|
|
- if id.ordinal is None:
|
|
|
- raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
|
|
|
-
|
|
|
- left = left_models[id]
|
|
|
- left_pk_map.insert(
|
|
|
- NormalizedModelName(id.model), left_models[id]["pk"], id.ordinal, ImportKind.Inserted
|
|
|
- )
|
|
|
- right_pk_map.insert(
|
|
|
- NormalizedModelName(id.model), right["pk"], id.ordinal, ImportKind.Inserted
|
|
|
- )
|
|
|
+ for model_name, models in right_models.items():
|
|
|
+ for id, right in models.items():
|
|
|
+ assert id.ordinal is not None
|
|
|
+
|
|
|
+ left = left_models[model_name][id]
|
|
|
+ left_pk_map.insert(
|
|
|
+ NormalizedModelName(id.model),
|
|
|
+ left_models[model_name][id]["pk"],
|
|
|
+ id.ordinal,
|
|
|
+ ImportKind.Inserted,
|
|
|
+ )
|
|
|
+ right_pk_map.insert(
|
|
|
+ NormalizedModelName(id.model), right["pk"], id.ordinal, ImportKind.Inserted
|
|
|
+ )
|
|
|
|
|
|
# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
|
|
|
# outputs.
|
|
|
- for id, right in right_models.items():
|
|
|
- if id.ordinal is None:
|
|
|
- raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")
|
|
|
-
|
|
|
- # Try comparators applicable for this specific model.
|
|
|
- left = left_models[id]
|
|
|
- if id.model in comparators:
|
|
|
- # We take care to run ALL of the `compare()` methods on each comparator before calling
|
|
|
- # any `scrub()` methods. This ensures that, in cases where a single model uses multiple
|
|
|
- # comparators that touch the same fields, one comparator does not accidentally scrub the
|
|
|
- # inputs for its follower. If `compare()` functions are well-behaved (that is, they
|
|
|
- # don't mutate their inputs), this should be sufficient to ensure that the order in
|
|
|
- # which comparators are applied does not change the final output.
|
|
|
- for cmp in comparators[id.model]:
|
|
|
- ex = cmp.existence(id, left, right)
|
|
|
- if ex:
|
|
|
- findings.extend(ex)
|
|
|
- continue
|
|
|
-
|
|
|
- if isinstance(cmp, ForeignKeyComparator):
|
|
|
- cmp.set_primary_key_maps(left_pk_map, right_pk_map)
|
|
|
-
|
|
|
- res = cmp.compare(id, left, right)
|
|
|
- if res:
|
|
|
- findings.extend(res)
|
|
|
- for cmp in comparators[id.model]:
|
|
|
- cmp.scrub(left, right)
|
|
|
-
|
|
|
- # Finally, perform a diff on the remaining JSON.
|
|
|
- diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
|
|
|
- if diff:
|
|
|
- findings.append(
|
|
|
- ComparatorFinding(
|
|
|
- kind=ComparatorFindingKind.UnequalJSON,
|
|
|
- on=id,
|
|
|
- left_pk=left["pk"],
|
|
|
- right_pk=right["pk"],
|
|
|
- reason="\n " + "\n ".join(diff),
|
|
|
+ for model_name, models in right_models.items():
|
|
|
+ for id, right in models.items():
|
|
|
+ assert id.ordinal is not None
|
|
|
+
|
|
|
+ # Try comparators applicable for this specific model.
|
|
|
+ left = left_models[model_name][id]
|
|
|
+ if id.model in comparators:
|
|
|
+ # We take care to run ALL of the `compare()` methods on each comparator before
|
|
|
+ # calling any `scrub()` methods. This ensures that, in cases where a single model
|
|
|
+ # uses multiple comparators that touch the same fields, one comparator does not
|
|
|
+ # accidentally scrub the inputs for its follower. If `compare()` functions are
|
|
|
+ # well-behaved (that is, they don't mutate their inputs), this should be sufficient
|
|
|
+ # to ensure that the order in which comparators are applied does not change the
|
|
|
+ # final output.
|
|
|
+ for cmp in comparators[id.model]:
|
|
|
+ ex = cmp.existence(id, left, right)
|
|
|
+ if ex:
|
|
|
+ findings.extend(ex)
|
|
|
+ continue
|
|
|
+
|
|
|
+ if isinstance(cmp, ForeignKeyComparator):
|
|
|
+ cmp.set_primary_key_maps(left_pk_map, right_pk_map)
|
|
|
+
|
|
|
+ res = cmp.compare(id, left, right)
|
|
|
+ if res:
|
|
|
+ findings.extend(res)
|
|
|
+ for cmp in comparators[id.model]:
|
|
|
+ cmp.scrub(left, right)
|
|
|
+
|
|
|
+ # Finally, perform a diff on the remaining JSON.
|
|
|
+ diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
|
|
|
+ if diff:
|
|
|
+ findings.append(
|
|
|
+ ComparatorFinding(
|
|
|
+ kind=ComparatorFindingKind.UnequalJSON,
|
|
|
+ on=id,
|
|
|
+ left_pk=left["pk"],
|
|
|
+ right_pk=right["pk"],
|
|
|
+ reason="\n " + "\n ".join(diff),
|
|
|
+ )
|
|
|
)
|
|
|
- )
|
|
|
|
|
|
return findings
|