Browse Source

feat(backup): Add correctness snapshot test (#51925)

This is the first test, of hopefully many, designed to better validate
the export/import flow. In this spirit, some scaffolding has been added
to help future testability:

- The import_/export functions have been split in two: one function for
the actual functionality (which is what gets called by the test suite),
and another that wraps this function to expose it as a CLI command using
`@click...` decorators. The latter (ie, import/export CLI command
coherence) is still tested in
tests/sentry/runners/commands/test_backup.py.
- A small JSON diffing framework has been added to test_correctness.py,
ensuring that JSON is diffable byte-by-byte, as well as more complex
scenarios like updated times, hash regexes, etc. A README file has been
added, describing the basic shape of these tests to future maintainers.
Alex Zaslavsky 1 year ago
parent
commit
ffce497273

+ 315 - 0
fixtures/backup/fresh-install.json

@@ -0,0 +1,315 @@
+[
+{
+  "model": "sites.site",
+  "pk": 1,
+  "fields": {
+    "domain": "example.com",
+    "name": "example.com"
+  }
+},
+{
+  "model": "sentry.option",
+  "pk": 1,
+  "fields": {
+    "key": "sentry:last_worker_ping",
+    "last_updated": "2023-06-27T21:40:01.305Z",
+    "last_updated_by": "unknown",
+    "value": 1687902001.2623305
+  }
+},
+{
+  "model": "sentry.option",
+  "pk": 2,
+  "fields": {
+    "key": "sentry:last_worker_version",
+    "last_updated": "2023-06-27T21:40:01.312Z",
+    "last_updated_by": "unknown",
+    "value": "\"23.7.0.dev0\""
+  }
+},
+{
+  "model": "sentry.option",
+  "pk": 3,
+  "fields": {
+    "key": "sentry:install-id",
+    "last_updated": "2023-06-23T00:03:42.785Z",
+    "last_updated_by": "unknown",
+    "value": "\"1bc81f9440424e908a73ca377581cb52682db68a\""
+  }
+},
+{
+  "model": "sentry.option",
+  "pk": 4,
+  "fields": {
+    "key": "sentry:latest_version",
+    "last_updated": "2023-06-27T21:25:02.602Z",
+    "last_updated_by": "unknown",
+    "value": "\"23.6.1\""
+  }
+},
+{
+  "model": "sentry.actor",
+  "pk": 1,
+  "fields": {
+    "type": 0,
+    "user_id": null,
+    "team": 1
+  }
+},
+{
+  "model": "sentry.email",
+  "pk": 1,
+  "fields": {
+    "email": "testing@example.com",
+    "date_added": "2023-06-22T22:59:55.531Z"
+  }
+},
+{
+  "model": "sentry.organization",
+  "pk": 1,
+  "fields": {
+    "name": "Testing",
+    "slug": "testing",
+    "status": 0,
+    "date_added": "2023-06-22T22:54:28.773Z",
+    "default_role": "member",
+    "is_test": false,
+    "flags": "1"
+  }
+},
+{
+  "model": "sentry.user",
+  "pk": 1,
+  "fields": {
+    "password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=",
+    "last_login": null,
+    "username": "testing@example.com",
+    "name": "",
+    "email": "testing@example.com",
+    "is_staff": true,
+    "is_active": true,
+    "is_superuser": true,
+    "is_managed": false,
+    "is_sentry_app": null,
+    "is_password_expired": false,
+    "last_password_change": "2023-06-22T22:59:57.023Z",
+    "flags": "0",
+    "session_nonce": null,
+    "date_joined": "2023-06-22T22:59:55.488Z",
+    "last_active": "2023-06-22T22:59:55.489Z",
+    "avatar_type": 0,
+    "avatar_url": null
+  }
+},
+{
+  "model": "sentry.organizationmapping",
+  "pk": 1,
+  "fields": {
+    "organization_id": 1,
+    "slug": "testing",
+    "name": "Testing",
+    "date_created": "2023-06-22T22:54:29.123Z",
+    "customer_id": null,
+    "verified": true,
+    "idempotency_key": "",
+    "region_name": "--monolith--",
+    "status": 0
+  }
+},
+{
+  "model": "sentry.relayusage",
+  "pk": 1,
+  "fields": {
+    "relay_id": "541d8473-0a90-4a4e-b536-704f223521b0",
+    "version": "23.6.1",
+    "first_seen": "2023-06-22T23:07:12.275Z",
+    "last_seen": "2023-06-27T21:27:32.702Z",
+    "public_key": "mEOy5fxmUgSEgpDWdovUMgBlkbo-ENo3bP8lyB5Z1-Q"
+  }
+},
+{
+  "model": "sentry.relay",
+  "pk": 1,
+  "fields": {
+    "relay_id": "541d8473-0a90-4a4e-b536-704f223521b0",
+    "public_key": "mEOy5fxmUgSEgpDWdovUMgBlkbo-ENo3bP8lyB5Z1-Q",
+    "first_seen": null,
+    "last_seen": null,
+    "is_internal": true
+  }
+},
+{
+  "model": "sentry.useremail",
+  "pk": 1,
+  "fields": {
+    "user": [
+      "testing@example.com"
+    ],
+    "email": "testing@example.com",
+    "validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE",
+    "date_hash_added": "2023-06-22T22:59:55.521Z",
+    "is_verified": false
+  }
+},
+{
+  "model": "sentry.userrole",
+  "pk": 1,
+  "fields": {
+    "date_updated": "2023-06-22T23:00:00.123Z",
+    "date_added": "2023-06-22T22:54:27.960Z",
+    "name": "Super Admin",
+    "permissions": "['broadcasts.admin', 'users.admin', 'options.admin']"
+  }
+},
+{
+  "model": "sentry.userroleuser",
+  "pk": 1,
+  "fields": {
+    "date_updated": "2023-06-22T23:00:00.123Z",
+    "date_added": "2023-06-22T22:59:57.073Z",
+    "user": [
+      "testing@example.com"
+    ],
+    "role": 1
+  }
+},
+{
+  "model": "sentry.team",
+  "pk": 1,
+  "fields": {
+    "organization": 1,
+    "slug": "testing",
+    "name": "Testing",
+    "status": 0,
+    "actor": 1,
+    "idp_provisioned": false,
+    "date_added": "2023-06-22T22:54:28.821Z",
+    "org_role": null
+  }
+},
+{
+  "model": "sentry.organizationmember",
+  "pk": 1,
+  "fields": {
+    "organization": 1,
+    "user_id": 1,
+    "email": null,
+    "role": "owner",
+    "flags": "0",
+    "token": null,
+    "date_added": "2023-06-22T22:59:55.561Z",
+    "token_expires_at": null,
+    "has_global_access": true,
+    "inviter_id": null,
+    "invite_status": 0,
+    "type": 50,
+    "user_is_active": true,
+    "user_email": "testing@example.com"
+  }
+},
+{
+  "model": "sentry.project",
+  "pk": 1,
+  "fields": {
+    "slug": "internal",
+    "name": "Internal",
+    "forced_color": null,
+    "organization": 1,
+    "public": false,
+    "date_added": "2023-06-22T22:54:28.851Z",
+    "status": 0,
+    "first_event": null,
+    "flags": "10",
+    "platform": null
+  }
+},
+{
+  "model": "sentry.projectkey",
+  "pk": 1,
+  "fields": {
+    "project": 1,
+    "label": "Default",
+    "public_key": "c1373be35a7944808c8bcfede8097df1",
+    "secret_key": "d4d14d244f8e48788673116ca7646c93",
+    "roles": "1",
+    "status": 0,
+    "date_added": "2023-06-22T22:54:28.932Z",
+    "rate_limit_count": null,
+    "rate_limit_window": null,
+    "data": {
+      "dynamicSdkLoaderOptions": {
+        "hasPerformance": true,
+        "hasReplay": true
+      }
+    }
+  }
+},
+{
+  "model": "sentry.rule",
+  "pk": 1,
+  "fields": {
+    "project": 1,
+    "environment_id": null,
+    "label": "Send a notification for new issues",
+    "data": "{\"match\":\"all\",\"conditions\":[{\"id\":\"sentry.rules.conditions.first_seen_event.FirstSeenEventCondition\"}],\"actions\":[{\"id\":\"sentry.mail.actions.NotifyEmailAction\",\"targetType\":\"IssueOwners\",\"targetIdentifier\":null,\"fallthroughType\":\"ActiveMembers\"}]}",
+    "status": 0,
+    "source": 0,
+    "owner": null,
+    "date_added": "2023-06-22T22:54:28.982Z"
+  }
+},
+{
+  "model": "sentry.projectteam",
+  "pk": 1,
+  "fields": {
+    "project": 1,
+    "team": 1
+  }
+},
+{
+  "model": "sentry.organizationmemberteam",
+  "pk": 1,
+  "fields": {
+    "team": 1,
+    "organizationmember": 1,
+    "is_active": true,
+    "role": null
+  }
+},
+{
+  "model": "sentry.projectoption",
+  "pk": 1,
+  "fields": {
+    "project": 1,
+    "key": "sentry:relay-rev",
+    "value": "\"883653e754e441f9aa54052b52482bdb\""
+  }
+},
+{
+  "model": "sentry.projectoption",
+  "pk": 2,
+  "fields": {
+    "project": 1,
+    "key": "sentry:relay-rev-lastchange",
+    "value": "\"2023-06-22T22:54:29.060271Z\""
+  }
+},
+{
+  "model": "sentry.projectoption",
+  "pk": 3,
+  "fields": {
+    "project": 1,
+    "key": "sentry:option-epoch",
+    "value": 11
+  }
+},
+{
+  "model": "sentry.projectoption",
+  "pk": 4,
+  "fields": {
+    "project": 1,
+    "key": "sentry:origins",
+    "value": "[\"*\"]"
+  }
+}
+]

+ 7 - 3
src/sentry/runner/commands/backup.py

@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from io import StringIO
 
 import click
@@ -14,7 +16,7 @@ EXCLUDED_APPS = frozenset(("auth", "contenttypes"))
 @click.argument("src", type=click.File("rb"))
 @configuration
 def import_(src):
-    "Imports data from a Sentry export."
+    """CLI command wrapping the `exec_import` functionality."""
 
     try:
         with transaction.atomic():
@@ -51,7 +53,9 @@ def sort_dependencies():
     """
     from django.apps import apps
 
-    from sentry.models import Actor, Team, User
+    from sentry.models.actor import Actor
+    from sentry.models.team import Team
+    from sentry.models.user import User
 
     # Process the list of models, and get the list of dependencies
     model_dependencies = []
@@ -145,7 +149,7 @@ def sort_dependencies():
 @click.option("--exclude", default=None, help="Models to exclude from export.", metavar="MODELS")
 @configuration
 def export(dest, silent, indent, exclude):
-    "Exports core metadata for the Sentry installation."
+    """CLI command wrapping the `exec_export` functionality."""
 
     if exclude is None:
         exclude = ()

+ 42 - 0
tests/sentry/backup/README.md

@@ -0,0 +1,42 @@
+# Backup tests
+
+This directory tests a number of scenarios where we take an empty database, import a given backup
+`.json` file, optionally perform some transform, re-export it and validate the diff. The expected
+result is that only the changes we have made are reflected in the final diff between the original
+input `.json` that we imported, and the final output `.json` that we exported.
+
+## Comparators
+
+A number of fields in the export JSON format are not so easily compared before and after running
+through an import/export cycle. We introduce "comparators" to account for such cases. Comparators
+are custom, model-specific ways of comparing portions of two json files that take into account more
+nuanced validations beyond the simple character-for-character matching that the default diffing
+algorithm provides for us.
+
+For example, `date_updated` fields that change due to the very act of importing the JSON, or hashes
+that may change between import and export. In these cases, we may choose to simply validate that the
+exported `date_updated` is greater than the imported value for the same model instance, or that
+hashes match some general regex rule around size and valid characters without being identical.
+
+Comparator functions should implement the `JSONMutatingComparator` callback protocol, and be
+included in the `COMPARATORS` dictionary, matching them to the models they are applicable to. Note
+that the comparator is explicitly described as "mutating" - this is important, as part of its
+functionality is to _modify the compared fields in both the expected and actual output, so that
+text-based JSON diffing does not fail on them_. In the example above, the `date_updated` field in
+_both_ the expected and actual JSON should be replaced with some obvious sentinel value indicating
+that a comparator-initiated mutation took place, like `__COMPARATOR_DATE_UPDATED__`.
+
+While it may be tempting to remove the offending field, or to make the actual match the expected
+after the comparison is completed, this is discouraged, as it will result in the subsequent JSON
+diff looking identical, potentially confusing future readers. Better to just throw up a very obvious
+signal that "a framework-generated replacement occurred here" to make future debugging less
+stressful.
+
+## Snapshots
+
+A number of default starter snapshots are provided to bootstrap this test flow.
+
+### fresh_install.json
+
+This represents the state of the database immediately after running `./install.sh` to create a new
+instance of `self-hosted`.

+ 0 - 0
tests/sentry/backup/__init__.py


+ 135 - 0
tests/sentry/backup/test_correctness.py

@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from difflib import unified_diff
+from pathlib import Path
+from typing import NamedTuple
+
+import pytest
+from click.testing import CliRunner
+from freezegun import freeze_time
+
+from sentry.db.postgres.roles import in_test_psql_role_override
+from sentry.runner.commands.backup import export, import_
+from sentry.testutils.factories import get_fixture_path
+from sentry.utils import json
+from sentry.utils.json import JSONData, JSONEncoder, better_default_encoder
+
+
+# TODO(team-ospo/#155): Figure out if we are going to use `pk` as part of the identifier, or some other kind of sequence number internal to the JSON export instead.
+class InstanceID(NamedTuple):
+    """Every entry in the generated backup JSON file should have a unique model+pk combination, which serves as its identifier."""
+
+    model: str
+    pk: int
+
+    def pretty(self) -> str:
+        return f"InstanceID(model: {self.model!r}, pk: {self.pk})"
+
+
+class ComparatorFinding(NamedTuple):
+    """Store all information about a single failed matching between expected and actual output."""
+
+    name: str
+    on: InstanceID
+    reason: str = ""
+
+    def pretty(self) -> str:
+        return f"Finding(\n\tname: {self.name!r},\n\ton: {self.on.pretty()},\n\treason: {self.reason}\n)"
+
+
+class ComparatorFindings:
+    """A wrapper type for a list of 'ComparatorFinding' which enables pretty-printing in asserts."""
+
+    def __init__(self, findings: list[ComparatorFinding]):
+        self.findings = findings
+
+    def append(self, finding: ComparatorFinding) -> None:
+        self.findings.append(finding)
+
+    def pretty(self) -> str:
+        return "\n".join(f.pretty() for f in self.findings)
+
+
+JSON_PRETTY_PRINTER = JSONEncoder(
+    default=better_default_encoder, indent=2, ignore_nan=True, sort_keys=True
+)
+
+
+def json_lines(obj: JSONData) -> list[str]:
+    """Take a JSONData object and pretty-print it as JSON."""
+
+    return JSON_PRETTY_PRINTER.encode(obj).splitlines()
+
+
+# TODO(team-ospo/#155): Move this out of the test suite, and into its own standalone module, since eventually it will be used to compare live JSON as well.
+def validate(expect: JSONData, actual: JSONData) -> ComparatorFindings:
+    """Ensures that originally imported data correctly matches actual outputted data, and produces a list of reasons why not when it doesn't"""
+
+    findings = ComparatorFindings([])
+    exp_models = {}
+    act_models = {}
+    for model in expect:
+        id = InstanceID(model["model"], model["pk"])
+        exp_models[id] = model
+
+    # Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not.
+    for model in actual:
+        id = InstanceID(model["model"], model["pk"])
+        if id in act_models:
+            findings.append(ComparatorFinding("duplicate_entry", id))
+        else:
+            act_models[id] = model
+
+    # Report unexpected and missing entries in the actual JSON.
+    extra = sorted(act_models.keys() - exp_models.keys())
+    missing = sorted(exp_models.keys() - act_models.keys())
+    for id in extra:
+        del act_models[id]
+        findings.append(ComparatorFinding("unexpected_entry", id))
+    for id in missing:
+        del exp_models[id]
+        findings.append(ComparatorFinding("missing_entry", id))
+
+    # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
+    # outputs.
+    for id, act in act_models.items():
+        exp = exp_models[id]
+
+        # Finally, perform a diff on the remaining JSON.
+        diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3))
+        if diff:
+            findings.append(ComparatorFinding("json_diff", id, "\n    " + "\n    ".join(diff)))
+
+    return findings
+
+
+def import_then_export(tmp_path: Path, fixture_file_name: str) -> None:
+    """Test helper that validates that the originally imported data correctly matches actual
+    outputted data, and produces a list of reasons why not when it doesn't"""
+
+    fixture_file_path = get_fixture_path("backup", fixture_file_name)
+    with open(fixture_file_path) as backup_file:
+        input = json.load(backup_file)
+
+    with in_test_psql_role_override("postgres"):
+        rv = CliRunner().invoke(import_, [str(fixture_file_path)])
+        assert rv.exit_code == 0, rv.output
+
+    tmp_json_file_path = str(tmp_path.joinpath("tmp_test_file.json"))
+    rv = CliRunner().invoke(
+        export, [tmp_json_file_path], obj={"silent": True, "indent": 2, "exclude": None}
+    )
+    assert rv.exit_code == 0, rv.output
+
+    with open(tmp_json_file_path) as tmp_file:
+        output = json.load(tmp_file)
+
+    res = validate(input, output)
+    if res.findings:
+        raise AssertionError(res.pretty())
+
+
+@pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__")
+@freeze_time("2023-06-22T23:00:00.123Z")
+def test_fresh_install(tmp_path):
+    import_then_export(tmp_path, "fresh-install.json")