1 year ago · 37e595baf5
--- a/bin/model-dependency-graphviz
+++ b/bin/model-dependency-graphviz
@@ -30,23 +30,36 @@ digraph Models {
 
				         node [shape="plaintext",style="none"]
			
 
				         key1 [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
			
 
				             <tr><td align="right" port="i1">HybridCloudForeignKey</td></tr>
			
 
				-            <tr><td align="right" port="i2">Explicit ForeignKey</td></tr>
			
 
				-            <tr><td align="right" port="i3">Implicit ForeignKey</td></tr>
			
 
				-            <tr><td align="right" port="i4">Control Silo Model</td></tr>
			
 
				-            <tr><td align="right" port="i5">Region Silo Model</td></tr>
			
 
				-            <tr><td align="right" port="i6">Unexported Model</td></tr>
			
 
				+            <tr><td align="right" port="i2">HybridCloudForeignKey (nullable)</td></tr>
			
 
				+            <tr><td align="right" port="i3">Explicit ForeignKey</td></tr>
			
 
				+            <tr><td align="right" port="i4">Explicit ForeignKey (nullable)</td></tr>
			
 
				+            <tr><td align="right" port="i5">Implicit ForeignKey</td></tr>
			
 
				+            <tr><td align="right" port="i6">Implicit ForeignKey (nullable)</td></tr>
			
 
				+            <tr><td align="right" port="i7">Control Silo Model</td></tr>
			
 
				+            <tr><td align="right" port="i8">Control Silo Model (dangling)</td></tr>
			
 
				+            <tr><td align="right" port="i9">Region Silo Model</td></tr>
			
 
				+            <tr><td align="right" port="i10">Region Silo Model (dangling)</td></tr>
			
 
				+            <tr><td align="right" port="i11">Unexported Model</td></tr>
			
 
				         </table>>]
			
 
				         key2 [label=<<table border="0" cellpadding="2" cellspacing="0" cellborder="0">
			
 
				             <tr><td port="i1">&nbsp;</td></tr>
			
 
				             <tr><td port="i2">&nbsp;</td></tr>
			
 
				             <tr><td port="i3">&nbsp;</td></tr>
			
 
				-            <tr><td port="i4" bgcolor="lightcoral">&nbsp;</td></tr>
			
 
				-            <tr><td port="i5" bgcolor="lightblue">&nbsp;</td></tr>
			
 
				-            <tr><td port="i6" bgcolor="grey">&nbsp;</td></tr>
			
 
				+            <tr><td port="i4">&nbsp;</td></tr>
			
 
				+            <tr><td port="i5">&nbsp;</td></tr>
			
 
				+            <tr><td port="i6">&nbsp;</td></tr>
			
 
				+            <tr><td port="i7" bgcolor="#ffb6c1ff">&nbsp;</td></tr>
			
 
				+            <tr><td port="i8" bgcolor="#ffb6c166">&nbsp;</td></tr>
			
 
				+            <tr><td port="i9" bgcolor="#add8e6ff">&nbsp;</td></tr>
			
 
				+            <tr><td port="i10" bgcolor="#add8e666">&nbsp;</td></tr>
			
 
				+            <tr><td port="i11" bgcolor="#c0c0c0ff">&nbsp;</td></tr>
			
 
				         </table>>]
			
 
				-        key1:i1:e -> key2:i1:w [color=green]
			
 
				-        key1:i2:e -> key2:i2:w [color=blue]
			
 
				-        key1:i3:e -> key2:i3:w [color=red]
			
 
				+        key1:i1:e -> key2:i1:w [color="#008b00ff",style=solid]
			
 
				+        key1:i2:e -> key2:i2:w [color="#008b0066",style=dashed]
			
 
				+        key1:i3:e -> key2:i3:w [color="#0000eeff",style=solid]
			
 
				+        key1:i4:e -> key2:i4:w [color="#0000ee66",style=dashed]
			
 
				+        key1:i5:e -> key2:i5:w [color="#cd0000ff",style=solid]
			
 
				+        key1:i6:e -> key2:i6:w [color="#cd000066",style=dashed]
			
 
				     }
			
 
				 
			
 
				     $clusters
			
@@ -63,7 +76,7 @@ cluster = Template(
 
				         shape="rectangle"
			
 
				         fillcolor="$fill"
			
 
				         fontsize="40"
			
 
				-        color="grey"
			
 
				+        color="#c0c0c0"
			
 
				 
			
 
				         $nodes
			
 
				     }
			
@@ -73,28 +86,30 @@ cluster = Template(
 
				 
			
 
				 @unique
			
 
				 class ClusterColor(Enum):
			
 
				-    Purple = "lavenderblush"
			
 
				-    Yellow = "khaki"
			
 
				-    Green = "honeydew"
			
 
				-    Blue = "lightsteelblue1"
			
 
				+    Purple = "#fff0f5"  # lavenderblush
			
 
				+    Yellow = "#f0e68c"  # khaki
			
 
				+    Green = "#f0fff0"  # honeydew
			
 
				+    Blue = "#cae1ff"  # lightsteelblue1
			
 
				 
			
 
				 
			
 
				 @unique
			
 
				 class NodeColor(Enum):
			
 
				-    Red = "lightpink"
			
 
				-    Blue = "lightblue"
			
 
				+    Red = "#ffb6c1"  # lightpink
			
 
				+    Blue = "#add8e6"  # lightblue
			
 
				 
			
 
				 
			
 
				 @unique
			
 
				 class EdgeColor(Enum):
			
 
				-    Hybrid = "color=green"
			
 
				-    Explicit = "color=blue"
			
 
				-    Implicit = "color=red"
			
 
				+    Hybrid = "#008b00"  # green4
			
 
				+    Explicit = "#0000ee"  # blue2
			
 
				+    Implicit = "#cd0000"  # red3
			
 
				 
			
 
				 
			
 
				-def print_model_node(model: models.base.ModelBase, silo: SiloMode) -> str:
			
 
				+def print_model_node(mr: ModelRelations, silo: SiloMode) -> str:
			
 
				+    id = mr.model.__name__
			
 
				     color = NodeColor.Red if silo == SiloMode.CONTROL else NodeColor.Blue
			
 
				-    return f""""{model.__name__}" [fillcolor="{color.value}"];"""
			
 
				+    opacity = "66" if mr.dangling else "ff"
			
 
				+    return f""""{id}" [fillcolor="{color.value}{opacity}",color="#000000{opacity}"];"""
			
 
				 
			
 
				 
			
 
				 def print_rel_scope_subgraph(
			
@@ -104,7 +119,7 @@ def print_rel_scope_subgraph(
 
				         num=num,
			
 
				         name=name,
			
 
				         fill=color.value,
			
 
				-        nodes="\n        ".join([print_model_node(mr.model, mr.silos[0]) for mr in rels]),
			
 
				+        nodes="\n        ".join([print_model_node(mr, mr.silos[0]) for mr in rels]),
			
 
				     )
			
 
				 
			
 
				 
			
@@ -123,7 +138,7 @@ def print_edge(src: models.base.ModelBase, dest: models.base.ModelBase, field: F
 
				     elif field.kind == ForeignFieldKind.ImplicitForeignKey:
			
 
				         color = EdgeColor.Implicit
			
 
				     style = "dashed" if field.nullable else "solid"
			
 
				-    return f""""{src.__name__}":e -> "{dest.__name__}":w [{color.value},style={style}];"""
			
 
				+    return f""""{src.__name__}":e -> "{dest.__name__}":w [color="{color.value}",style={style}];"""
			
 
				 
			
 
				 
			
 
				 def get_most_permissive_relocation_scope(mr: ModelRelations) -> RelocationScope:
			
--- a/fixtures/backup/model_dependencies/detailed.json
+++ b/fixtures/backup/model_dependencies/detailed.json
--- a/src/sentry/backup/dependencies.py
+++ b/src/sentry/backup/dependencies.py
@@ -1,6 +1,7 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				 from collections import defaultdict
			
 
				+from dataclasses import dataclass
			
 
				 from enum import Enum, auto, unique
			
 
				 from functools import lru_cache
			
 
				 from typing import Dict, FrozenSet, NamedTuple, Optional, Set, Tuple, Type
			
@@ -14,6 +15,71 @@ from sentry.silo import SiloMode
 
				 from sentry.utils import json
			
 
				 
			
 
				 
			
 
				+class NormalizedModelName:
			
 
				+    """
			
 
				+    A wrapper type that ensures that the contained model name has been properly normalized. A "normalized" model name is one that is identical to the name as it appears in an exported JSON backup, so a string of the form `{app_label.lower()}.{model_name.lower()}`.
			
 
				+    """
			
 
				+
			
 
				+    __model_name: str
			
 
				+
			
 
				+    def __init__(self, model_name: str):
			
 
				+        if "." not in model_name:
			
 
				+            raise TypeError("cannot create NormalizedModelName from invalid input string")
			
 
				+        self.__model_name = model_name.lower()
			
 
				+
			
 
				+    def __hash__(self):
			
 
				+        return hash(self.__model_name)
			
 
				+
			
 
				+    def __eq__(self, other) -> bool:
			
 
				+        if other is None:
			
 
				+            return False
			
 
				+        if not isinstance(other, self.__class__):
			
 
				+            raise TypeError(
			
 
				+                "NormalizedModelName can only be compared with other NormalizedModelName"
			
 
				+            )
			
 
				+        return self.__model_name == other.__model_name
			
 
				+
			
 
				+    def __lt__(self, other) -> bool:
			
 
				+        if not isinstance(other, self.__class__):
			
 
				+            raise TypeError(
			
 
				+                "NormalizedModelName can only be compared with other NormalizedModelName"
			
 
				+            )
			
 
				+        return self.__model_name < other.__model_name
			
 
				+
			
 
				+    def __str__(self) -> str:
			
 
				+        return self.__model_name
			
 
				+
			
 
				+
			
 
				+# A "root" model is one that is the source of a particular relocation scope - ex, `User` is the root
			
 
				+# of the `User` relocation scope, the model from which all other (non-dangling; see below) models in
			
 
				+# that scope are referenced.
			
 
				+#
			
 
				+# TODO(getsentry/team-ospo#190): We should find a better way to store this information than a magic
			
 
				+# list in this file. We should probably make a field (or method?) on `BaseModel` instead.
			
 
				+@unique
			
 
				+class RelocationRootModels(Enum):
			
 
				+    """
			
 
				+    Record the "root" models for a given `RelocationScope`.
			
 
				+    """
			
 
				+
			
 
				+    Excluded: list[NormalizedModelName] = []
			
 
				+    User = [NormalizedModelName("sentry.user")]
			
 
				+    Organization = [NormalizedModelName("sentry.organization")]
			
 
				+    Config = [
			
 
				+        NormalizedModelName("sentry.controloption"),
			
 
				+        NormalizedModelName("sentry.option"),
			
 
				+        NormalizedModelName("sentry.relay"),
			
 
				+        NormalizedModelName("sentry.relayusage"),
			
 
				+        NormalizedModelName("sentry.userrole"),
			
 
				+    ]
			
 
				+    # TODO(getsentry/team-ospo#188): Split out extension scope root models from this list.
			
 
				+    Global = [
			
 
				+        NormalizedModelName("sentry.apiapplication"),
			
 
				+        NormalizedModelName("sentry.integration"),
			
 
				+        NormalizedModelName("sentry.sentryapp"),
			
 
				+    ]
			
 
				+
			
 
				+
			
 
				 @unique
			
 
				 class ForeignFieldKind(Enum):
			
 
				     """Kinds of foreign fields that we care about."""
			
@@ -47,9 +113,20 @@ class ForeignField(NamedTuple):
 
				     nullable: bool
			
 
				 
			
 
				 
			
 
				-class ModelRelations(NamedTuple):
			
 
				+@dataclass
			
 
				+class ModelRelations:
			
 
				     """What other models does this model depend on, and how?"""
			
 
				 
			
 
				+    # A "dangling" model is one that does not transitively contain a non-nullable `ForeignField`
			
 
				+    # reference to at least one of the `RelocationRootModels` listed above.
			
 
				+    #
			
 
				+    # TODO(getsentry/team-ospo#190): A model may or may not be "dangling" in different
			
 
				+    # `ExportScope`s - for example, a model in `RelocationScope.Organization` may have a single,
			
 
				+    # non-nullable `ForeignField` reference to a root model in `RelocationScope.Config`. This would
			
 
				+    # cause it to be dangling when we do an `ExportScope.Organization` export, but non-dangling if
			
 
				+    # we do an `ExportScope.Global` export. HOWEVER, as best as I can tell, this situation does not
			
 
				+    # actually exist today, so we can ignore this subtlety for now and just us a boolean here.
			
 
				+    dangling: Optional[bool]
			
 
				     foreign_keys: dict[str, ForeignField]
			
 
				     model: Type[models.base.Model]
			
 
				     relocation_scope: RelocationScope | set[RelocationScope]
			
@@ -63,41 +140,6 @@ class ModelRelations(NamedTuple):
 
				         return {ff.model for ff in self.foreign_keys.values()}
			
 
				 
			
 
				 
			
 
				-class NormalizedModelName:
			
 
				-    """
			
 
				-    A wrapper type that ensures that the contained model name has been properly normalized. A "normalized" model name is one that is identical to the name as it appears in an exported JSON backup, so a string of the form `{app_label.lower()}.{model_name.lower()}`.
			
 
				-    """
			
 
				-
			
 
				-    __model_name: str
			
 
				-
			
 
				-    def __init__(self, model_name: str):
			
 
				-        if "." not in model_name:
			
 
				-            raise TypeError("cannot create NormalizedModelName from invalid input string")
			
 
				-        self.__model_name = model_name.lower()
			
 
				-
			
 
				-    def __hash__(self):
			
 
				-        return hash(self.__model_name)
			
 
				-
			
 
				-    def __eq__(self, other) -> bool:
			
 
				-        if other is None:
			
 
				-            return False
			
 
				-        if not isinstance(other, self.__class__):
			
 
				-            raise TypeError(
			
 
				-                "NormalizedModelName can only be compared with other NormalizedModelName"
			
 
				-            )
			
 
				-        return self.__model_name == other.__model_name
			
 
				-
			
 
				-    def __lt__(self, other) -> bool:
			
 
				-        if not isinstance(other, self.__class__):
			
 
				-            raise TypeError(
			
 
				-                "NormalizedModelName can only be compared with other NormalizedModelName"
			
 
				-            )
			
 
				-        return self.__model_name < other.__model_name
			
 
				-
			
 
				-    def __str__(self) -> str:
			
 
				-        return self.__model_name
			
 
				-
			
 
				-
			
 
				 def get_model_name(model: type[models.Model] | models.Model) -> NormalizedModelName:
			
 
				     return NormalizedModelName(f"{model._meta.app_label}.{model._meta.object_name}")
			
 
				 
			
@@ -119,6 +161,8 @@ class DependenciesJSONEncoder(json.JSONEncoder):
 
				     def default(self, obj):
			
 
				         if meta := getattr(obj, "_meta", None):
			
 
				             return f"{meta.app_label}.{meta.object_name}".lower()
			
 
				+        if isinstance(obj, ModelRelations):
			
 
				+            return obj.__dict__
			
 
				         if isinstance(obj, ForeignFieldKind):
			
 
				             return obj.name
			
 
				         if isinstance(obj, RelocationScope):
			
@@ -226,8 +270,8 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
 
				     from sentry.models.actor import Actor
			
 
				     from sentry.models.team import Team
			
 
				 
			
 
				-    # Process the list of models, and get the list of dependencies
			
 
				-    model_dependencies_list: Dict[NormalizedModelName, ModelRelations] = {}
			
 
				+    # Process the list of models, and get the list of dependencies.
			
 
				+    model_dependencies_dict: Dict[NormalizedModelName, ModelRelations] = {}
			
 
				     app_configs = apps.get_app_configs()
			
 
				     models_from_names = {
			
 
				         get_model_name(model): model
			
@@ -285,6 +329,10 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
 
				                 field for field in model._meta.get_fields() if isinstance(field, OneToOneField)
			
 
				             ]
			
 
				             for field in one_to_one_fields:
			
 
				+                is_nullable = getattr(field, "null", False)
			
 
				+                if getattr(field, "unique", False):
			
 
				+                    uniques.add(frozenset({field.name}))
			
 
				+
			
 
				                 rel_model = getattr(field.remote_field, "model", None)
			
 
				                 if rel_model is not None and rel_model != model:
			
 
				                     if isinstance(field, OneToOneCascadeDeletes):
			
@@ -311,6 +359,10 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
 
				                 or isinstance(field, BoundedPositiveIntegerField)
			
 
				             ]
			
 
				             for field in simple_integer_fields:
			
 
				+                is_nullable = getattr(field, "null", False)
			
 
				+                if getattr(field, "unique", False):
			
 
				+                    uniques.add(frozenset({field.name}))
			
 
				+
			
 
				                 # "actor_id", when used as a simple integer field rather than a `ForeignKey` into an
			
 
				                 # `Actor`, refers to a unified but loosely specified means by which to index into a
			
 
				                 # either a `Team` or `User`, before this pattern was formalized by the official
			
@@ -322,12 +374,13 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
 
				                         foreign_keys[field.name] = ForeignField(
			
 
				                             model=models_from_names[candidate],
			
 
				                             kind=ForeignFieldKind.ImplicitForeignKey,
			
 
				-                            nullable=False,
			
 
				+                            nullable=is_nullable,
			
 
				                         )
			
 
				 
			
 
				-            model_dependencies_list[get_model_name(model)] = ModelRelations(
			
 
				-                model=model,
			
 
				+            model_dependencies_dict[get_model_name(model)] = ModelRelations(
			
 
				+                dangling=None,
			
 
				                 foreign_keys=foreign_keys,
			
 
				+                model=model,
			
 
				                 relocation_scope=getattr(model, "__relocation_scope__", RelocationScope.Excluded),
			
 
				                 silos=list(
			
 
				                     getattr(model._meta, "silo_limit", ModelSiloLimit(SiloMode.MONOLITH)).modes
			
@@ -336,7 +389,54 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
 
				                 # Sort the constituent sets alphabetically, so that we get consistent JSON output.
			
 
				                 uniques=sorted(list(uniques), key=lambda u: ":".join(sorted(list(u)))),
			
 
				             )
			
 
				-    return model_dependencies_list
			
 
				+
			
 
				+    # Get a flat list of "root" models, then mark all of them as non-dangling.
			
 
				+    relocation_root_models: list[NormalizedModelName] = []
			
 
				+    for root_models in RelocationRootModels:
			
 
				+        relocation_root_models.extend(root_models.value)
			
 
				+    for model_name in relocation_root_models:
			
 
				+        model_dependencies_dict[model_name].dangling = False
			
 
				+
			
 
				+    # Now that all `ModelRelations` have been added to the `model_dependencies_dict`, we can circle
			
 
				+    # back and figure out which ones are actually dangling. We do this by marking all of the root
			
 
				+    # models non-dangling, then traversing from every other model to a (possible) root model
			
 
				+    # recursively. At this point there should be no circular reference chains, so if we encounter
			
 
				+    # them, fail immediately.
			
 
				+    def resolve_dangling(seen: Set[NormalizedModelName], model_name: NormalizedModelName) -> bool:
			
 
				+        model_relations = model_dependencies_dict[model_name]
			
 
				+        model_name = get_model_name(model_relations.model)
			
 
				+        if model_name in seen:
			
 
				+            raise RuntimeError(
			
 
				+                f"Circular dependency: {model_name} cannot transitively reference itself"
			
 
				+            )
			
 
				+        if model_relations.dangling is not None:
			
 
				+            return model_relations.dangling
			
 
				+
			
 
				+        # TODO(getsentry/team-ospo#190): Maybe make it so that `Global` models are never "dangling",
			
 
				+        # since we want to export 100% of models in `ExportScope.Global` anyway?
			
 
				+
			
 
				+        seen.add(model_name)
			
 
				+
			
 
				+        # If we are able to successfully over all of the foreign keys without encountering a
			
 
				+        # dangling reference, we know that this model is dangling as well.
			
 
				+        model_relations.dangling = True
			
 
				+        for ff in model_relations.foreign_keys.values():
			
 
				+            if not ff.nullable:
			
 
				+                foreign_model_name = get_model_name(ff.model)
			
 
				+                if not resolve_dangling(seen, foreign_model_name):
			
 
				+                    # We only need one non-dangling reference to mark this model as non-dangling as
			
 
				+                    # well.
			
 
				+                    model_relations.dangling = False
			
 
				+                    break
			
 
				+
			
 
				+        seen.remove(model_name)
			
 
				+        return model_relations.dangling
			
 
				+
			
 
				+    for model_name in model_dependencies_dict.keys():
			
 
				+        if model_name not in relocation_root_models:
			
 
				+            resolve_dangling(set(), model_name)
			
 
				+
			
 
				+    return model_dependencies_dict
			
 
				 
			
 
				 
			
 
				 # No arguments, so we lazily cache the result after the first calculation.
			
@@ -347,9 +447,9 @@ def sorted_dependencies() -> list[Type[models.base.Model]]:
 
				     Similar to Django's algorithm except that we discard the importance of natural keys
			
 
				     when sorting dependencies (ie, it works without them)."""
			
 
				 
			
 
				-    model_dependencies_list = list(dependencies().values())
			
 
				-    model_dependencies_list.reverse()
			
 
				-    model_set = {md.model for md in model_dependencies_list}
			
 
				+    model_dependencies_dict = list(dependencies().values())
			
 
				+    model_dependencies_dict.reverse()
			
 
				+    model_set = {md.model for md in model_dependencies_dict}
			
 
				 
			
 
				     # Now sort the models to ensure that dependencies are met. This
			
 
				     # is done by repeatedly iterating over the input list of models.
			
@@ -360,11 +460,11 @@ def sorted_dependencies() -> list[Type[models.base.Model]]:
 
				     # If we do a full iteration without a promotion, that means there are
			
 
				     # circular dependencies in the list.
			
 
				     model_list = []
			
 
				-    while model_dependencies_list:
			
 
				+    while model_dependencies_dict:
			
 
				         skipped = []
			
 
				         changed = False
			
 
				-        while model_dependencies_list:
			
 
				-            model_deps = model_dependencies_list.pop()
			
 
				+        while model_dependencies_dict:
			
 
				+            model_deps = model_dependencies_dict.pop()
			
 
				             deps = model_deps.flatten()
			
 
				             model = model_deps.model
			
 
				 
			
@@ -388,6 +488,6 @@ def sorted_dependencies() -> list[Type[models.base.Model]]:
 
				                     for m in sorted(skipped, key=lambda mr: get_model_name(mr.model))
			
 
				                 )
			
 
				             )
			
 
				-        model_dependencies_list = skipped
			
 
				+        model_dependencies_dict = skipped
			
 
				 
			
 
				     return model_list