Browse Source

feat(backup): Generate more dependency metadata (#56977)

We add 3 new kinds of metadata: a list of backing database tables sorted
in truncation order (that is, the order one would truncate them in to
avoid foreign key problems), a list of unique field sets for each model,
and a boolean tracking nullability for each foreign key. These are
useful for the following reasons:

1. The sorted truncation tables will let us clear the (mock!) validation
database when doing the validation script prior to import.
2. Tracking uniques provides a list of models that must handle
collisions in some special way at import time.
3. Tracking nullability is necessary to deduce which models we can
safely remove from an export JSON.

Issue: getsentry/team-ospo#196
Issue: getsentry/team-ospo#199
Alex Zaslavsky 1 year ago
parent
commit
d68bcebe6f

+ 12 - 1
bin/generate-model-dependency-fixtures

@@ -7,7 +7,12 @@ configure()
 
 import click
 
-from sentry.backup.dependencies import DependenciesJSONEncoder, dependencies, sorted_dependencies
+from sentry.backup.dependencies import (
+    DependenciesJSONEncoder,
+    dependencies,
+    get_model_name,
+    sorted_dependencies,
+)
 from sentry.testutils.factories import get_fixture_path  # noqa
 
 encoder = DependenciesJSONEncoder(
@@ -31,6 +36,7 @@ def main():
     detailed = {str(k): v for k, v in dependencies().items()}
     flat = {k: v.flatten() for k, v in detailed.items()}
     sorted = sorted_dependencies()
+    truncate = [dependencies()[get_model_name(m)].table_name for m in sorted_dependencies()]
 
     det_path = get_fixture_path("backup", "model_dependencies", "detailed.json")
     with open(det_path, "w+") as fixture:
@@ -44,6 +50,11 @@ def main():
     with open(det_path, "w+") as fixture:
         fixture.write(encoder.encode(sorted))
 
+    # Print tables in the order one would need to list them for a TRUNCATE statement.
+    det_path = get_fixture_path("backup", "model_dependencies", "truncate.json")
+    with open(det_path, "w+") as fixture:
+        fixture.write(encoder.encode(truncate))
+
     click.echo(
         f"\nSuccess! The dependency mapping fixtures at {[det_path, flat_path]} were updated.\n"
     )

+ 14 - 15
bin/model-dependency-graphviz

@@ -15,7 +15,7 @@ from string import Template
 import click
 from django.db import models
 
-from sentry.backup.dependencies import ForeignFieldKind, ModelRelations, dependencies
+from sentry.backup.dependencies import ForeignField, ForeignFieldKind, ModelRelations, dependencies
 
 digraph = Template(
     """
@@ -86,10 +86,10 @@ class NodeColor(Enum):
 
 
 @unique
-class EdgeStyle(Enum):
-    Hybrid = "[color=green]"
-    Explicit = "[color=blue]"
-    Implicit = "[color=red]"
+class EdgeColor(Enum):
+    Hybrid = "color=green"
+    Explicit = "color=blue"
+    Implicit = "color=red"
 
 
 def print_model_node(model: models.base.ModelBase, silo: SiloMode) -> str:
@@ -113,18 +113,17 @@ def print_edges(mr: ModelRelations) -> str:
         return ""
 
     src = mr.model
-    return "\n    ".join([print_edge(src, ff.model, ff.kind) for ff in mr.foreign_keys.values()])
+    return "\n    ".join([print_edge(src, ff.model, ff) for ff in mr.foreign_keys.values()])
 
 
-def print_edge(
-    src: models.base.ModelBase, dest: models.base.ModelBase, kind: ForeignFieldKind
-) -> str:
-    style = EdgeStyle.Explicit
-    if kind == ForeignFieldKind.HybridCloudForeignKey:
-        style = EdgeStyle.Hybrid
-    elif kind == ForeignFieldKind.ImplicitForeignKey:
-        style = EdgeStyle.Implicit
-    return f""""{src.__name__}":e -> "{dest.__name__}":w {style.value};"""
+def print_edge(src: models.base.ModelBase, dest: models.base.ModelBase, field: ForeignField) -> str:
+    color = EdgeColor.Explicit
+    if field.kind == ForeignFieldKind.HybridCloudForeignKey:
+        color = EdgeColor.Hybrid
+    elif field.kind == ForeignFieldKind.ImplicitForeignKey:
+        color = EdgeColor.Implicit
+    style = "dashed" if field.nullable else "solid"
+    return f""""{src.__name__}":e -> "{dest.__name__}":w [{color.value},style={style}];"""
 
 
 def get_most_permissive_relocation_scope(mr: ModelRelations) -> RelocationScope:

File diff suppressed because it is too large
+ 463 - 66
fixtures/backup/model_dependencies/detailed.json


+ 1 - 1
fixtures/backup/model_dependencies/sorted.json

@@ -217,4 +217,4 @@
   "sentry.incidentsubscription",
   "sentry.incidenttrigger",
   "sentry.monitorincident"
-]
+]

+ 220 - 0
fixtures/backup/model_dependencies/truncate.json

@@ -0,0 +1,220 @@
+[
+  "django_session",
+  "django_site",
+  "sentry_option",
+  "sentry_controloption",
+  "sentry_regionoutbox",
+  "sentry_controloutbox",
+  "sentry_scheduleddeletion",
+  "sentry_regionscheduleddeletion",
+  "sentry_controlfileblob",
+  "sentry_controlfile",
+  "sentry_fileblob",
+  "sentry_file",
+  "sentry_broadcast",
+  "sentry_deletedorganization",
+  "sentry_email",
+  "sentry_organization",
+  "sentry_organizationmapping",
+  "sentry_identityprovider",
+  "sentry_docintegration",
+  "sentry_integration",
+  "sentry_integrationfeature",
+  "auth_user",
+  "sentry_organizationslugreservation",
+  "sentry_project",
+  "sentry_projectbookmark",
+  "sentry_projectkey",
+  "sentry_projectownership",
+  "sentry_projectplatform",
+  "sentry_projectredirect",
+  "sentry_promptsactivity",
+  "sentry_rawevent",
+  "sentry_recentsearch",
+  "sentry_relayusage",
+  "sentry_relay",
+  "sentry_repository",
+  "sentry_reprocessingreport",
+  "sentry_savedsearch",
+  "sentry_sentryfunction",
+  "sentry_regiontombstone",
+  "sentry_controltombstone",
+  "sentry_projecttransactionthresholdoverride",
+  "sentry_projecttransactionthreshold",
+  "sentry_useremail",
+  "sentry_userip",
+  "sentry_userpermission",
+  "sentry_userrole",
+  "sentry_userrole_users",
+  "sentry_timeseriessnapshot",
+  "sentry_discoversavedquery",
+  "sentry_monitor",
+  "sentry_monitorlocation",
+  "sentry_metricskeyindexer",
+  "sentry_stringindexer",
+  "sentry_perfstringindexer",
+  "sentry_exporteddata",
+  "sentry_exporteddatablob",
+  "nodestore_node",
+  "replays_replayrecordingsegment",
+  "social_auth_usersocialauth",
+  "hybridcloud_organizationslugreservationreplica",
+  "sentry_discoversavedqueryproject",
+  "sentry_processingissue",
+  "sentry_orgauthtoken",
+  "sentry_organizationonboardingtask",
+  "sentry_lostpasswordhash",
+  "sentry_latestappconnectbuildscheck",
+  "sentry_projectintegration",
+  "sentry_organizationintegration",
+  "sentry_externalissue",
+  "sentry_identity",
+  "sentry_grouptombstone",
+  "sentry_release",
+  "sentry_release_project",
+  "sentry_groupedmessage",
+  "sentry_organizationmember",
+  "sentry_featureadoption",
+  "sentry_eventuser",
+  "sentry_eventattachment",
+  "sentry_environment",
+  "sentry_environmentproject",
+  "sentry_customdynamicsamplingrule",
+  "sentry_customdynamicsamplingruleproject",
+  "sentry_distribution",
+  "sentry_deploy",
+  "sentry_deletedteam",
+  "sentry_deletedproject",
+  "sentry_projectdsymfile",
+  "sentry_dashboardtombstone",
+  "sentry_dashboard",
+  "sentry_dashboardproject",
+  "sentry_projectcounter",
+  "sentry_commitauthor",
+  "sentry_commit",
+  "sentry_broadcastseen",
+  "sentry_useravatar",
+  "sentry_projectavatar",
+  "sentry_organizationavatar",
+  "sentry_docintegrationavatar",
+  "sentry_fileblobindex",
+  "sentry_fileblobowner",
+  "sentry_controlfileblobindex",
+  "sentry_controlfileblobowner",
+  "sentry_authprovider",
+  "sentry_authidentity",
+  "auth_authenticator",
+  "sentry_assistant_activity",
+  "sentry_artifactbundleflatfileindex",
+  "sentry_artifactbundle",
+  "sentry_appconnectbuild",
+  "sentry_apikey",
+  "sentry_apiapplication",
+  "sentry_actor",
+  "sentry_useroption",
+  "sentry_projectoptions",
+  "sentry_organizationoptions",
+  "sentry_activity",
+  "sentry_apiauthorization",
+  "sentry_apigrant",
+  "sentry_apitoken",
+  "sentry_flatfileindexstate",
+  "sentry_artifactbundleindex",
+  "sentry_releaseartifactbundle",
+  "sentry_debugidartifactbundle",
+  "sentry_projectartifactbundle",
+  "sentry_auditlogentry",
+  "sentry_authidentityreplica",
+  "sentry_authproviderreplica",
+  "sentry_commitfilechange",
+  "sentry_dashboardwidget",
+  "sentry_proguardartifactrelease",
+  "sentry_grouphistory",
+  "sentry_team",
+  "sentry_groupowner",
+  "sentry_groupasignee",
+  "sentry_groupbookmark",
+  "sentry_groupcommitresolution",
+  "sentry_groupemailthread",
+  "sentry_groupenvironment",
+  "sentry_grouphash",
+  "sentry_groupinbox",
+  "sentry_grouplink",
+  "sentry_groupmeta",
+  "sentry_groupredirect",
+  "sentry_grouprelease",
+  "sentry_groupresolution",
+  "sentry_groupseen",
+  "sentry_groupshare",
+  "sentry_groupsnooze",
+  "sentry_organizationmembermapping",
+  "sentry_groupsubscription",
+  "sentry_externalactor",
+  "sentry_integrationexternalproject",
+  "sentry_sentryapp",
+  "sentry_repositoryprojectpathconfig",
+  "sentry_sentryappcomponent",
+  "sentry_sentryappinstallation",
+  "sentry_sentryappinstallationforprovider",
+  "sentry_sentryappinstallationtoken",
+  "sentry_latestrelease",
+  "sentry_notificationsettingoption",
+  "sentry_notificationsettingprovider",
+  "sentry_notificationsetting",
+  "sentry_organizationaccessrequest",
+  "sentry_platformexternalissue",
+  "sentry_eventprocessingissue",
+  "sentry_snubaquery",
+  "sentry_snubaqueryeventtype",
+  "sentry_querysubscription",
+  "sentry_projectteam",
+  "sentry_projectcodeowners",
+  "sentry_pull_request",
+  "sentry_pullrequest_commit",
+  "sentry_pullrequest_comment",
+  "sentry_releaseactivity",
+  "sentry_releasecommit",
+  "sentry_environmentrelease",
+  "sentry_releasefile",
+  "sentry_releaseheadcommit",
+  "sentry_releaseprojectenvironment",
+  "sentry_rule",
+  "sentry_ruleactivity",
+  "sentry_neglectedrule",
+  "sentry_rulefirehistory",
+  "sentry_servicehook",
+  "sentry_teamreplica",
+  "sentry_userreport",
+  "sentry_notificationaction",
+  "sentry_alertrule",
+  "sentry_alertruletrigger",
+  "sentry_alertruletriggerexclusion",
+  "sentry_alertruletriggeraction",
+  "sentry_alertruleactivity",
+  "sentry_performanceteamkeytransaction",
+  "sentry_monitorenvironment",
+  "sentry_releasethreshold",
+  "feedback_feedback",
+  "hybridcloud_apikeyreplica",
+  "sentry_monitorcheckin",
+  "sentry_alertruleexcludedprojects",
+  "sentry_incident",
+  "sentry_incidentseen",
+  "sentry_incidentproject",
+  "sentry_notificationactionproject",
+  "sentry_servicehookproject",
+  "sentry_rulesnooze",
+  "sentry_grouprulestatus",
+  "sentry_organizationmember_teams",
+  "sentry_dashboardwidgetquery",
+  "sentry_teamavatar",
+  "sentry_sentryappavatar",
+  "sentry_authprovider_default_teams",
+  "sentry_organizationmember_teamsreplica",
+  "sentry_pendingincidentsnapshot",
+  "sentry_incidentsnapshot",
+  "sentry_incidentactivity",
+  "sentry_incidentsubscription",
+  "sentry_incidenttrigger",
+  "sentry_monitorincident"
+]

+ 24 - 2
src/sentry/backup/dependencies.py

@@ -3,7 +3,7 @@ from __future__ import annotations
 from collections import defaultdict
 from enum import Enum, auto, unique
 from functools import lru_cache
-from typing import Dict, NamedTuple, Optional, Tuple, Type
+from typing import Dict, FrozenSet, NamedTuple, Optional, Set, Tuple, Type
 
 from django.db import models
 from django.db.models.fields.related import ForeignKey, OneToOneField
@@ -44,15 +44,18 @@ class ForeignField(NamedTuple):
 
     model: Type[models.base.Model]
     kind: ForeignFieldKind
+    nullable: bool
 
 
 class ModelRelations(NamedTuple):
     """What other models does this model depend on, and how?"""
 
-    model: Type[models.base.Model]
     foreign_keys: dict[str, ForeignField]
+    model: Type[models.base.Model]
     relocation_scope: RelocationScope | set[RelocationScope]
     silos: list[SiloMode]
+    table_name: str
+    uniques: list[frozenset[str]]
 
     def flatten(self) -> set[Type[models.base.Model]]:
         """Returns a flat list of all related models, omitting the kind of relation they have."""
@@ -127,6 +130,9 @@ class DependenciesJSONEncoder(json.JSONEncoder):
             return obj.name.lower().capitalize()
         if isinstance(obj, set):
             return sorted(list(obj), key=lambda obj: get_model_name(obj))
+        # JSON serialization of `uniques` values, which are stored in `frozenset`s.
+        if isinstance(obj, frozenset):
+            return sorted(list(obj))
         return super().default(obj)
 
 
@@ -237,9 +243,16 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
 
         for model in model_iterator:
             foreign_keys: Dict[str, ForeignField] = dict()
+            uniques: Set[FrozenSet[str]] = {
+                frozenset(combo) for combo in model._meta.unique_together
+            }
 
             # Now add a dependency for any FK relation visible to Django.
             for field in model._meta.get_fields():
+                is_nullable = getattr(field, "null", False)
+                if getattr(field, "unique", False):
+                    uniques.add(frozenset({field.name}))
+
                 rel_model = getattr(field.remote_field, "model", None)
                 if rel_model is not None and rel_model != model:
                     # TODO(hybrid-cloud): actor refactor. Add kludgy conditional preventing walking
@@ -251,17 +264,20 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
                         foreign_keys[field.name] = ForeignField(
                             model=rel_model,
                             kind=ForeignFieldKind.FlexibleForeignKey,
+                            nullable=is_nullable,
                         )
                     elif isinstance(field, ForeignKey):
                         foreign_keys[field.name] = ForeignField(
                             model=rel_model,
                             kind=ForeignFieldKind.DefaultForeignKey,
+                            nullable=is_nullable,
                         )
                 elif isinstance(field, HybridCloudForeignKey):
                     rel_model = models_from_names[NormalizedModelName(field.foreign_model_name)]
                     foreign_keys[field.name] = ForeignField(
                         model=rel_model,
                         kind=ForeignFieldKind.HybridCloudForeignKey,
+                        nullable=is_nullable,
                     )
 
             # Get all simple O2O relations as well.
@@ -275,11 +291,13 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
                         foreign_keys[field.name] = ForeignField(
                             model=rel_model,
                             kind=ForeignFieldKind.OneToOneCascadeDeletes,
+                            nullable=is_nullable,
                         )
                     elif isinstance(field, OneToOneField):
                         foreign_keys[field.name] = ForeignField(
                             model=rel_model,
                             kind=ForeignFieldKind.DefaultOneToOneField,
+                            nullable=is_nullable,
                         )
                     else:
                         raise RuntimeError("Unknown one to kind")
@@ -304,6 +322,7 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
                         foreign_keys[field.name] = ForeignField(
                             model=models_from_names[candidate],
                             kind=ForeignFieldKind.ImplicitForeignKey,
+                            nullable=False,
                         )
 
             model_dependencies_list[get_model_name(model)] = ModelRelations(
@@ -313,6 +332,9 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
                 silos=list(
                     getattr(model._meta, "silo_limit", ModelSiloLimit(SiloMode.MONOLITH)).modes
                 ),
+                table_name=model._meta.db_table,
+                # Sort the constituent sets alphabetically, so that we get consistent JSON output.
+                uniques=sorted(list(uniques), key=lambda u: ":".join(sorted(list(u)))),
             )
     return model_dependencies_list
 

+ 22 - 1
tests/sentry/backup/test_dependencies.py

@@ -1,6 +1,11 @@
 from difflib import unified_diff
 
-from sentry.backup.dependencies import DependenciesJSONEncoder, dependencies, sorted_dependencies
+from sentry.backup.dependencies import (
+    DependenciesJSONEncoder,
+    dependencies,
+    get_model_name,
+    sorted_dependencies,
+)
 from sentry.testutils.factories import get_fixture_path
 
 encoder = DependenciesJSONEncoder(
@@ -53,3 +58,19 @@ def test_sorted():
             "Model dependency list does not match fixture. If you are seeing this in CI, please run `bin/generate-model-dependency-fixtures` and re-upload:\n\n"
             + "\n".join(diff)
         )
+
+
+def test_truncate():
+    fixture_path = get_fixture_path("backup", "model_dependencies", "truncate.json")
+    with open(fixture_path) as fixture:
+        expect = fixture.read().splitlines()
+
+    actual = encoder.encode(
+        [dependencies()[get_model_name(m)].table_name for m in sorted_dependencies()]
+    ).splitlines()
+    diff = list(unified_diff(expect, actual, n=3))
+    if diff:
+        raise AssertionError(
+            "Model dependency list does not match fixture. If you are seeing this in CI, please run `bin/generate-model-dependency-fixtures` and re-upload:\n\n"
+            + "\n".join(diff)
+        )

Some files were not shown because too many files changed in this diff