1 year ago · ffce497273
--- a/fixtures/backup/fresh-install.json
+++ b/fixtures/backup/fresh-install.json
@@ -0,0 +1,315 @@
 
				+[
			
 
				+{
			
 
				+  "model": "sites.site",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "domain": "example.com",
			
 
				+    "name": "example.com"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.option",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "key": "sentry:last_worker_ping",
			
 
				+    "last_updated": "2023-06-27T21:40:01.305Z",
			
 
				+    "last_updated_by": "unknown",
			
 
				+    "value": 1687902001.2623305
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.option",
			
 
				+  "pk": 2,
			
 
				+  "fields": {
			
 
				+    "key": "sentry:last_worker_version",
			
 
				+    "last_updated": "2023-06-27T21:40:01.312Z",
			
 
				+    "last_updated_by": "unknown",
			
 
				+    "value": "\"23.7.0.dev0\""
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.option",
			
 
				+  "pk": 3,
			
 
				+  "fields": {
			
 
				+    "key": "sentry:install-id",
			
 
				+    "last_updated": "2023-06-23T00:03:42.785Z",
			
 
				+    "last_updated_by": "unknown",
			
 
				+    "value": "\"1bc81f9440424e908a73ca377581cb52682db68a\""
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.option",
			
 
				+  "pk": 4,
			
 
				+  "fields": {
			
 
				+    "key": "sentry:latest_version",
			
 
				+    "last_updated": "2023-06-27T21:25:02.602Z",
			
 
				+    "last_updated_by": "unknown",
			
 
				+    "value": "\"23.6.1\""
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.actor",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "type": 0,
			
 
				+    "user_id": null,
			
 
				+    "team": 1
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.email",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "email": "testing@example.com",
			
 
				+    "date_added": "2023-06-22T22:59:55.531Z"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.organization",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "name": "Testing",
			
 
				+    "slug": "testing",
			
 
				+    "status": 0,
			
 
				+    "date_added": "2023-06-22T22:54:28.773Z",
			
 
				+    "default_role": "member",
			
 
				+    "is_test": false,
			
 
				+    "flags": "1"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.user",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=",
			
 
				+    "last_login": null,
			
 
				+    "username": "testing@example.com",
			
 
				+    "name": "",
			
 
				+    "email": "testing@example.com",
			
 
				+    "is_staff": true,
			
 
				+    "is_active": true,
			
 
				+    "is_superuser": true,
			
 
				+    "is_managed": false,
			
 
				+    "is_sentry_app": null,
			
 
				+    "is_password_expired": false,
			
 
				+    "last_password_change": "2023-06-22T22:59:57.023Z",
			
 
				+    "flags": "0",
			
 
				+    "session_nonce": null,
			
 
				+    "date_joined": "2023-06-22T22:59:55.488Z",
			
 
				+    "last_active": "2023-06-22T22:59:55.489Z",
			
 
				+    "avatar_type": 0,
			
 
				+    "avatar_url": null
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.organizationmapping",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "organization_id": 1,
			
 
				+    "slug": "testing",
			
 
				+    "name": "Testing",
			
 
				+    "date_created": "2023-06-22T22:54:29.123Z",
			
 
				+    "customer_id": null,
			
 
				+    "verified": true,
			
 
				+    "idempotency_key": "",
			
 
				+    "region_name": "--monolith--",
			
 
				+    "status": 0
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.relayusage",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "relay_id": "541d8473-0a90-4a4e-b536-704f223521b0",
			
 
				+    "version": "23.6.1",
			
 
				+    "first_seen": "2023-06-22T23:07:12.275Z",
			
 
				+    "last_seen": "2023-06-27T21:27:32.702Z",
			
 
				+    "public_key": "mEOy5fxmUgSEgpDWdovUMgBlkbo-ENo3bP8lyB5Z1-Q"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.relay",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "relay_id": "541d8473-0a90-4a4e-b536-704f223521b0",
			
 
				+    "public_key": "mEOy5fxmUgSEgpDWdovUMgBlkbo-ENo3bP8lyB5Z1-Q",
			
 
				+    "first_seen": null,
			
 
				+    "last_seen": null,
			
 
				+    "is_internal": true
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.useremail",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "user": [
			
 
				+      "testing@example.com"
			
 
				+    ],
			
 
				+    "email": "testing@example.com",
			
 
				+    "validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE",
			
 
				+    "date_hash_added": "2023-06-22T22:59:55.521Z",
			
 
				+    "is_verified": false
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.userrole",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "date_updated": "2023-06-22T23:00:00.123Z",
			
 
				+    "date_added": "2023-06-22T22:54:27.960Z",
			
 
				+    "name": "Super Admin",
			
 
				+    "permissions": "['broadcasts.admin', 'users.admin', 'options.admin']"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.userroleuser",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "date_updated": "2023-06-22T23:00:00.123Z",
			
 
				+    "date_added": "2023-06-22T22:59:57.073Z",
			
 
				+    "user": [
			
 
				+      "testing@example.com"
			
 
				+    ],
			
 
				+    "role": 1
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.team",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "organization": 1,
			
 
				+    "slug": "testing",
			
 
				+    "name": "Testing",
			
 
				+    "status": 0,
			
 
				+    "actor": 1,
			
 
				+    "idp_provisioned": false,
			
 
				+    "date_added": "2023-06-22T22:54:28.821Z",
			
 
				+    "org_role": null
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.organizationmember",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "organization": 1,
			
 
				+    "user_id": 1,
			
 
				+    "email": null,
			
 
				+    "role": "owner",
			
 
				+    "flags": "0",
			
 
				+    "token": null,
			
 
				+    "date_added": "2023-06-22T22:59:55.561Z",
			
 
				+    "token_expires_at": null,
			
 
				+    "has_global_access": true,
			
 
				+    "inviter_id": null,
			
 
				+    "invite_status": 0,
			
 
				+    "type": 50,
			
 
				+    "user_is_active": true,
			
 
				+    "user_email": "testing@example.com"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.project",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "slug": "internal",
			
 
				+    "name": "Internal",
			
 
				+    "forced_color": null,
			
 
				+    "organization": 1,
			
 
				+    "public": false,
			
 
				+    "date_added": "2023-06-22T22:54:28.851Z",
			
 
				+    "status": 0,
			
 
				+    "first_event": null,
			
 
				+    "flags": "10",
			
 
				+    "platform": null
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.projectkey",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "label": "Default",
			
 
				+    "public_key": "c1373be35a7944808c8bcfede8097df1",
			
 
				+    "secret_key": "d4d14d244f8e48788673116ca7646c93",
			
 
				+    "roles": "1",
			
 
				+    "status": 0,
			
 
				+    "date_added": "2023-06-22T22:54:28.932Z",
			
 
				+    "rate_limit_count": null,
			
 
				+    "rate_limit_window": null,
			
 
				+    "data": {
			
 
				+      "dynamicSdkLoaderOptions": {
			
 
				+        "hasPerformance": true,
			
 
				+        "hasReplay": true
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.rule",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "environment_id": null,
			
 
				+    "label": "Send a notification for new issues",
			
 
				+    "data": "{\"match\":\"all\",\"conditions\":[{\"id\":\"sentry.rules.conditions.first_seen_event.FirstSeenEventCondition\"}],\"actions\":[{\"id\":\"sentry.mail.actions.NotifyEmailAction\",\"targetType\":\"IssueOwners\",\"targetIdentifier\":null,\"fallthroughType\":\"ActiveMembers\"}]}",
			
 
				+    "status": 0,
			
 
				+    "source": 0,
			
 
				+    "owner": null,
			
 
				+    "date_added": "2023-06-22T22:54:28.982Z"
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.projectteam",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "team": 1
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.organizationmemberteam",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "team": 1,
			
 
				+    "organizationmember": 1,
			
 
				+    "is_active": true,
			
 
				+    "role": null
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.projectoption",
			
 
				+  "pk": 1,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "key": "sentry:relay-rev",
			
 
				+    "value": "\"883653e754e441f9aa54052b52482bdb\""
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.projectoption",
			
 
				+  "pk": 2,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "key": "sentry:relay-rev-lastchange",
			
 
				+    "value": "\"2023-06-22T22:54:29.060271Z\""
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.projectoption",
			
 
				+  "pk": 3,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "key": "sentry:option-epoch",
			
 
				+    "value": 11
			
 
				+  }
			
 
				+},
			
 
				+{
			
 
				+  "model": "sentry.projectoption",
			
 
				+  "pk": 4,
			
 
				+  "fields": {
			
 
				+    "project": 1,
			
 
				+    "key": "sentry:origins",
			
 
				+    "value": "[\"*\"]"
			
 
				+  }
			
 
				+}
			
 
				+]
			
--- a/src/sentry/runner/commands/backup.py
+++ b/src/sentry/runner/commands/backup.py
@@ -1,3 +1,5 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				 from io import StringIO
			
 
				 
			
 
				 import click
			
@@ -14,7 +16,7 @@ EXCLUDED_APPS = frozenset(("auth", "contenttypes"))
 
				 @click.argument("src", type=click.File("rb"))
			
 
				 @configuration
			
 
				 def import_(src):
			
 
				-    "Imports data from a Sentry export."
			
 
				+    """CLI command wrapping the `exec_import` functionality."""
			
 
				 
			
 
				     try:
			
 
				         with transaction.atomic():
			
@@ -51,7 +53,9 @@ def sort_dependencies():
 
				     """
			
 
				     from django.apps import apps
			
 
				 
			
 
				-    from sentry.models import Actor, Team, User
			
 
				+    from sentry.models.actor import Actor
			
 
				+    from sentry.models.team import Team
			
 
				+    from sentry.models.user import User
			
 
				 
			
 
				     # Process the list of models, and get the list of dependencies
			
 
				     model_dependencies = []
			
@@ -145,7 +149,7 @@ def sort_dependencies():
 
				 @click.option("--exclude", default=None, help="Models to exclude from export.", metavar="MODELS")
			
 
				 @configuration
			
 
				 def export(dest, silent, indent, exclude):
			
 
				-    "Exports core metadata for the Sentry installation."
			
 
				+    """CLI command wrapping the `exec_export` functionality."""
			
 
				 
			
 
				     if exclude is None:
			
 
				         exclude = ()
			
--- a/tests/sentry/backup/README.md
+++ b/tests/sentry/backup/README.md
@@ -0,0 +1,42 @@
 
				+# Backup tests
			
 
				+
			
 
				+This directory tests a number of scenarios where we take an empty database, import a given backup
			
 
				+`.json` file, optionally perform some transform, re-export it and validate the diff. The expected
			
 
				+result is that only the changes we have made are reflected in the final diff between the original
			
 
				+input `.json` that we imported, and the final output `.json` that we exported.
			
 
				+
			
 
				+## Comparators
			
 
				+
			
 
				+A number of fields in the export JSON format are not so easily compared before and after running
			
 
				+through an import/export cycle. We introduce "comparators" to account for such cases. Comparators
			
 
				+are custom, model-specific ways of comparing portions of two json files that take into account more
			
 
				+nuanced validations beyond the simple character-for-character matching that the default diffing
			
 
				+algorithm provides for us.
			
 
				+
			
 
				+For example, `date_updated` fields that change due to the very act of importing the JSON, or hashes
			
 
				+that may change between import and export. In these cases, we may choose to simply validate that the
			
 
				+exported `date_updated` is greater than the imported value for the same model instance, or that
			
 
				+hashes match some general regex rule around size and valid characters without being identical.
			
 
				+
			
 
				+Comparator functions should implement the `JSONMutatingComparator` callback protocol, and be
			
 
				+included in the `COMPARATORS` dictionary, matching them to the models they are applicable to. Note
			
 
				+that the comparator is explicitly described as "mutating" - this is important, as part of its
			
 
				+functionality is to _modify the compared fields in both the expected and actual output, so that
			
 
				+text-based JSON diffing does not fail on them_. In the example above, the `date_updated` field in
			
 
				+_both_ the expected and actual JSON should be replaced with some obvious sentinel value indicating
			
 
				+that a comparator-initiated mutation took place, like `__COMPARATOR_DATE_UPDATED__`.
			
 
				+
			
 
				+While it may be tempting to remove the offending field, or to make the actual match the expected
			
 
				+after the comparison is completed, this is discouraged, as it will result in the subsequent JSON
			
 
				+diff looking identical, potentially confusing future readers. Better to just throw up a very obvious
			
 
				+signal that "a framework-generated replacement occurred here" to make future debugging less
			
 
				+stressful.
			
 
				+
			
 
				+## Snapshots
			
 
				+
			
 
				+A number of default starter snapshots are provided to bootstrap this test flow.
			
 
				+
			
 
				+### fresh_install.json
			
 
				+
			
 
				+This represents the state of the database immediately after running `./install.sh` to create a new
			
 
				+instance of `self-hosted`.
			
--- a/tests/sentry/backup/__init__.py
+++ b/tests/sentry/backup/__init__.py
--- a/tests/sentry/backup/test_correctness.py
+++ b/tests/sentry/backup/test_correctness.py
@@ -0,0 +1,135 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+from difflib import unified_diff
			
 
				+from pathlib import Path
			
 
				+from typing import NamedTuple
			
 
				+
			
 
				+import pytest
			
 
				+from click.testing import CliRunner
			
 
				+from freezegun import freeze_time
			
 
				+
			
 
				+from sentry.db.postgres.roles import in_test_psql_role_override
			
 
				+from sentry.runner.commands.backup import export, import_
			
 
				+from sentry.testutils.factories import get_fixture_path
			
 
				+from sentry.utils import json
			
 
				+from sentry.utils.json import JSONData, JSONEncoder, better_default_encoder
			
 
				+
			
 
				+
			
 
				+# TODO(team-ospo/#155): Figure out if we are going to use `pk` as part of the identifier, or some other kind of sequence number internal to the JSON export instead.
			
 
				+class InstanceID(NamedTuple):
			
 
				+    """Every entry in the generated backup JSON file should have a unique model+pk combination, which serves as its identifier."""
			
 
				+
			
 
				+    model: str
			
 
				+    pk: int
			
 
				+
			
 
				+    def pretty(self) -> str:
			
 
				+        return f"InstanceID(model: {self.model!r}, pk: {self.pk})"
			
 
				+
			
 
				+
			
 
				+class ComparatorFinding(NamedTuple):
			
 
				+    """Store all information about a single failed matching between expected and actual output."""
			
 
				+
			
 
				+    name: str
			
 
				+    on: InstanceID
			
 
				+    reason: str = ""
			
 
				+
			
 
				+    def pretty(self) -> str:
			
 
				+        return f"Finding(\n\tname: {self.name!r},\n\ton: {self.on.pretty()},\n\treason: {self.reason}\n)"
			
 
				+
			
 
				+
			
 
				+class ComparatorFindings:
			
 
				+    """A wrapper type for a list of 'ComparatorFinding' which enables pretty-printing in asserts."""
			
 
				+
			
 
				+    def __init__(self, findings: list[ComparatorFinding]):
			
 
				+        self.findings = findings
			
 
				+
			
 
				+    def append(self, finding: ComparatorFinding) -> None:
			
 
				+        self.findings.append(finding)
			
 
				+
			
 
				+    def pretty(self) -> str:
			
 
				+        return "\n".join(f.pretty() for f in self.findings)
			
 
				+
			
 
				+
			
 
				+JSON_PRETTY_PRINTER = JSONEncoder(
			
 
				+    default=better_default_encoder, indent=2, ignore_nan=True, sort_keys=True
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def json_lines(obj: JSONData) -> list[str]:
			
 
				+    """Take a JSONData object and pretty-print it as JSON."""
			
 
				+
			
 
				+    return JSON_PRETTY_PRINTER.encode(obj).splitlines()
			
 
				+
			
 
				+
			
 
				+# TODO(team-ospo/#155): Move this out of the test suite, and into its own standalone module, since eventually it will be used to compare live JSON as well.
			
 
				+def validate(expect: JSONData, actual: JSONData) -> ComparatorFindings:
			
 
				+    """Ensures that originally imported data correctly matches actual outputted data, and produces a list of reasons why not when it doesn't"""
			
 
				+
			
 
				+    findings = ComparatorFindings([])
			
 
				+    exp_models = {}
			
 
				+    act_models = {}
			
 
				+    for model in expect:
			
 
				+        id = InstanceID(model["model"], model["pk"])
			
 
				+        exp_models[id] = model
			
 
				+
			
 
				+    # Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not.
			
 
				+    for model in actual:
			
 
				+        id = InstanceID(model["model"], model["pk"])
			
 
				+        if id in act_models:
			
 
				+            findings.append(ComparatorFinding("duplicate_entry", id))
			
 
				+        else:
			
 
				+            act_models[id] = model
			
 
				+
			
 
				+    # Report unexpected and missing entries in the actual JSON.
			
 
				+    extra = sorted(act_models.keys() - exp_models.keys())
			
 
				+    missing = sorted(exp_models.keys() - act_models.keys())
			
 
				+    for id in extra:
			
 
				+        del act_models[id]
			
 
				+        findings.append(ComparatorFinding("unexpected_entry", id))
			
 
				+    for id in missing:
			
 
				+        del exp_models[id]
			
 
				+        findings.append(ComparatorFinding("missing_entry", id))
			
 
				+
			
 
				+    # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
			
 
				+    # outputs.
			
 
				+    for id, act in act_models.items():
			
 
				+        exp = exp_models[id]
			
 
				+
			
 
				+        # Finally, perform a diff on the remaining JSON.
			
 
				+        diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3))
			
 
				+        if diff:
			
 
				+            findings.append(ComparatorFinding("json_diff", id, "\n    " + "\n    ".join(diff)))
			
 
				+
			
 
				+    return findings
			
 
				+
			
 
				+
			
 
				+def import_then_export(tmp_path: Path, fixture_file_name: str) -> None:
			
 
				+    """Test helper that validates that the originally imported data correctly matches actual
			
 
				+    outputted data, and produces a list of reasons why not when it doesn't"""
			
 
				+
			
 
				+    fixture_file_path = get_fixture_path("backup", fixture_file_name)
			
 
				+    with open(fixture_file_path) as backup_file:
			
 
				+        input = json.load(backup_file)
			
 
				+
			
 
				+    with in_test_psql_role_override("postgres"):
			
 
				+        rv = CliRunner().invoke(import_, [str(fixture_file_path)])
			
 
				+        assert rv.exit_code == 0, rv.output
			
 
				+
			
 
				+    tmp_json_file_path = str(tmp_path.joinpath("tmp_test_file.json"))
			
 
				+    rv = CliRunner().invoke(
			
 
				+        export, [tmp_json_file_path], obj={"silent": True, "indent": 2, "exclude": None}
			
 
				+    )
			
 
				+    assert rv.exit_code == 0, rv.output
			
 
				+
			
 
				+    with open(tmp_json_file_path) as tmp_file:
			
 
				+        output = json.load(tmp_file)
			
 
				+
			
 
				+    res = validate(input, output)
			
 
				+    if res.findings:
			
 
				+        raise AssertionError(res.pretty())
			
 
				+
			
 
				+
			
 
				+@pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__")
			
 
				+@freeze_time("2023-06-22T23:00:00.123Z")
			
 
				+def test_fresh_install(tmp_path):
			
 
				+    import_then_export(tmp_path, "fresh-install.json")