Browse Source

feat(hybridcloud) Setup plumbing to run CI with split databases (#51307)

This won't be merged, but we want to get an understanding of how many
tests need to be fixed when run against split databases.

---------

Co-authored-by: Zachary Collins <zachary.collins@sentry.io>
Co-authored-by: Mike Ihbe <mike.ihbe@sentry.io>
Mark Story 1 year ago
parent
commit
add37f2eff
6 changed files with 149 additions and 83 deletions
  1. 4 4
      Makefile
  2. 6 2
      src/sentry/conf/server.py
  3. 1 0
      src/sentry/db/router.py
  4. 13 1
      src/sentry/testutils/cases.py
  5. 103 19
      src/sentry/testutils/silo.py
  6. 22 57
      tests/conftest.py

+ 4 - 4
Makefile

@@ -98,7 +98,7 @@ run-acceptance:
 	pytest tests/acceptance --cov . --cov-report="xml:.artifacts/acceptance.coverage.xml"
 	@echo ""
 
-test-cli:
+test-cli: create-db
 	@echo "--> Testing CLI"
 	rm -rf test_cli
 	mkdir test_cli
@@ -125,7 +125,7 @@ test-js-ci: node-version-check
 	@yarn run test-ci
 	@echo ""
 
-test-python-ci:
+test-python-ci: create-db
 	@echo "--> Running CI Python tests"
 	pytest tests/integration tests/sentry \
 		--ignore tests/sentry/eventstream/kafka \
@@ -137,7 +137,7 @@ test-python-ci:
 		--cov . --cov-report="xml:.artifacts/python.coverage.xml"
 	@echo ""
 
-test-snuba:
+test-snuba: create-db
 	@echo "--> Running snuba tests"
 	pytest tests/snuba \
 		tests/sentry/eventstream/kafka \
@@ -160,7 +160,7 @@ backend-typing:
 
 # JavaScript relay tests are meant to be run within Symbolicator test suite, as they are parametrized to verify both processing pipelines during migration process.
 # Running Locally: Run `sentry devservices up kafka zookeeper` before starting these tests
-test-symbolicator:
+test-symbolicator: create-db
 	@echo "--> Running symbolicator tests"
 	pytest tests/symbolicator -vv --cov . --cov-report="xml:.artifacts/symbolicator.coverage.xml"
 	pytest tests/relay_integration/lang/javascript/ -vv -m symbolicator

+ 6 - 2
src/sentry/conf/server.py

@@ -3398,7 +3398,9 @@ DISALLOWED_CUSTOMER_DOMAINS: list[str] = []
 SENTRY_ISSUE_PLATFORM_RATE_LIMITER_OPTIONS: dict[str, str] = {}
 SENTRY_ISSUE_PLATFORM_FUTURES_MAX_LIMIT = 10000
 
-if USE_SILOS:
+# USE_SPLIT_DBS is leveraged in tests as we validate db splits further.
+# Split databases are also required for the USE_SILOS devserver flow.
+if USE_SILOS or env("SENTRY_USE_SPLIT_DBS", default=False):
     # Add connections for the region & control silo databases.
     DATABASES["control"] = DATABASES["default"].copy()
     DATABASES["control"]["NAME"] = "control"
@@ -3407,6 +3409,9 @@ if USE_SILOS:
     # silo database is the 'default' elsewhere in application logic.
     DATABASES["default"]["NAME"] = "region"
 
+    DATABASE_ROUTERS = ("sentry.db.router.SiloRouter",)
+
+if USE_SILOS:
     # Addresses are hardcoded based on the defaults
     # we use in commands/devserver.
     SENTRY_REGION_CONFIG = [
@@ -3426,7 +3431,6 @@ if USE_SILOS:
             "control_silo_address": f"http://127.0.0.1:{control_port}",
         }
     )
-    DATABASE_ROUTERS = ("sentry.db.router.SiloRouter",)
 
 
 # How long we should wait for a gateway proxy request to return before giving up

+ 1 - 0
src/sentry/db/router.py

@@ -46,6 +46,7 @@ class SiloRouter:
         "django_content_type",
         "django_site",
         "django_session",
+        "auth_user",
         "auth_group",
         "auth_permission",
         "auth_group_permissions",

+ 13 - 1
src/sentry/testutils/cases.py

@@ -424,7 +424,7 @@ class _AssertQueriesContext(CaptureQueriesContext):
 @override_settings(ROOT_URLCONF="sentry.web.urls")
 class TestCase(BaseTestCase, DjangoTestCase):
     # We need Django to flush all databases.
-    databases = "__all__"
+    databases: set[str] | str = "__all__"
 
     # Ensure that testcases that ask for DB setup actually make use of the
     # DB. If they don't, they're wasting CI time.
@@ -495,10 +495,16 @@ class TestCase(BaseTestCase, DjangoTestCase):
 
 
 class TransactionTestCase(BaseTestCase, DjangoTransactionTestCase):
+    # We need Django to flush all databases.
+    databases: set[str] | str = "__all__"
+
     pass
 
 
 class PerformanceIssueTestCase(BaseTestCase):
+    # We need Django to flush all databases.
+    databases: set[str] | str = "__all__"
+
     def create_performance_issue(
         self,
         tags=None,
@@ -571,6 +577,9 @@ class APITestCase(BaseTestCase, BaseAPITestCase):
     must set the string `endpoint`.
     """
 
+    # We need Django to flush all databases.
+    databases: set[str] | str = "__all__"
+
     method = "get"
 
     @property
@@ -1018,6 +1027,9 @@ class SnubaTestCase(BaseTestCase):
     tests that require snuba.
     """
 
+    # We need Django to flush all databases.
+    databases: set[str] | str = "__all__"
+
     def setUp(self):
         super().setUp()
         self.init_snuba()

+ 103 - 19
src/sentry/testutils/silo.py

@@ -2,8 +2,9 @@ from __future__ import annotations
 
 import functools
 import inspect
+import sys
 from contextlib import contextmanager
-from typing import Any, Callable, Generator, Iterable, Set, Tuple, Type
+from typing import Any, Callable, Generator, Iterable, MutableMapping, MutableSet, Set, Tuple, Type
 from unittest import TestCase
 
 import pytest
@@ -11,6 +12,8 @@ from django.conf import settings
 from django.db import connections, router
 from django.db.models import Model
 from django.db.models.fields.related import RelatedField
+from django.db.models.signals import post_migrate
+from django.db.transaction import get_connection
 from django.test import override_settings
 
 from sentry import deletions
@@ -186,29 +189,110 @@ def exempt_from_silo_limits() -> Generator[None, None, None]:
         yield
 
 
-def reset_test_role(role: str, using: str | None = None, create_role: bool | None = None) -> None:
-    with connections["default"].cursor() as connection:
-        connection.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", [role])
-        if connection.fetchone():
-            connection.execute(f"REASSIGN OWNED BY {role} TO postgres")
-            connection.execute(f"DROP OWNED BY {role} CASCADE")
-            connection.execute(f"DROP ROLE {role}")
-        connection.execute(f"CREATE ROLE {role}")
-        connection.execute(f"GRANT USAGE ON SCHEMA public TO {role};")
-        connection.execute(f"GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO {role};")
-        connection.execute(f"GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO {role};")
+def reset_test_role(role: str, using: str, create_role: bool) -> None:
+    connection_names = [conn.alias for conn in connections.all()]
 
+    if create_role:
+        role_exists = False
+        with get_connection(using).cursor() as connection:
+            connection.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", [role])
+            role_exists = connection.fetchone()
 
-def restrict_role_by_silo(mode: SiloMode, role: str) -> None:
-    for model in iter_models():
-        silo_limit = getattr(model._meta, "silo_limit", None)
-        if silo_limit is None or mode not in silo_limit.modes:
-            restrict_role(role, model, "ALL PRIVILEGES")
+        if role_exists:
+            # Drop role permissions on each connection, or we can't drop the role.
+            for alias in connection_names:
+                with get_connection(alias).cursor() as conn:
+                    conn.execute(f"REASSIGN OWNED BY {role} TO postgres")
+                    conn.execute(f"DROP OWNED BY {role} CASCADE")
 
+            # Drop and re-create the role as required.
+            with get_connection(using).cursor() as conn:
+                conn.execute(f"DROP ROLE {role}")
+
+        with get_connection(using).cursor() as conn:
+            conn.execute(f"CREATE ROLE {role}")
+
+    # Create permissions on the current connection as we'll build up permissions incrementally.
+    with get_connection(using).cursor() as conn:
+        conn.execute(f"GRANT USAGE ON SCHEMA public TO {role};")
+        conn.execute(f"GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO {role};")
+        conn.execute(f"GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO {role};")
+
+
+_role_created: bool = False
+_role_privileges_created: MutableMapping[str, bool] = {}
+
+
+def create_model_role_guards(app_config: Any, using: str, **kwargs: Any):
+    global _role_created
+    if "pytest" not in sys.modules:
+        return
+
+    from sentry.db.models.fields.hybrid_cloud_foreign_key import HybridCloudForeignKey
+    from sentry.models import (
+        Organization,
+        OrganizationMapping,
+        OrganizationMember,
+        OrganizationMemberMapping,
+    )
+    from sentry.testutils.silo import iter_models, reset_test_role, restrict_role
+
+    if not app_config or app_config.name != "sentry":
+        return
+
+    with get_connection(using).cursor() as conn:
+        conn.execute("SET ROLE 'postgres'")
+
+    if not _role_privileges_created.get(using, False):
+        reset_test_role(role="postgres_unprivileged", using=using, create_role=not _role_created)
+        _role_created = True
+        _role_privileges_created[using] = True
+
+    # Protect Foreign Keys using hybrid cloud models from being deleted without using the privileged user.
+    # Deletion should only occur when the developer is actively aware of the need to generate outboxes.
+    seen_models: MutableSet[type] = set()
+    for model in iter_models(app_config.name):
+        for field in model._meta.fields:
+            if not isinstance(field, HybridCloudForeignKey):
+                continue
+            fk_model = field.foreign_model
+            if fk_model is None or fk_model in seen_models:
+                continue
+            seen_models.add(fk_model)
+
+            restrict_role(
+                role="postgres_unprivileged", model=fk_model, revocation_type="DELETE", using=using
+            )
+
+    # Protect organization members from being updated without also invoking the correct outbox logic.
+    # If you hit test failures as a result of lacking these privileges, first ensure that you create the correct
+    # outboxes in a transaction, and cover that transaction with `in_test_psql_role_override`
+    restrict_role(role="postgres_unprivileged", model=OrganizationMember, revocation_type="INSERT")
+    restrict_role(role="postgres_unprivileged", model=OrganizationMember, revocation_type="UPDATE")
+    restrict_role(role="postgres_unprivileged", model=Organization, revocation_type="INSERT")
+    restrict_role(role="postgres_unprivileged", model=Organization, revocation_type="UPDATE")
+    restrict_role(role="postgres_unprivileged", model=OrganizationMapping, revocation_type="INSERT")
+    restrict_role(role="postgres_unprivileged", model=OrganizationMapping, revocation_type="UPDATE")
+    # OrganizationMember objects need to cascade, but they can't use the standard hybrid cloud foreign key because the
+    # identifiers are not snowflake ids.
+    restrict_role(role="postgres_unprivileged", model=OrganizationMember, revocation_type="DELETE")
+
+    restrict_role(
+        role="postgres_unprivileged", model=OrganizationMemberMapping, revocation_type="INSERT"
+    )
+
+
+# Listen to django's migration signal so that we're not trapped inside
+# test method transactions.
+post_migrate.connect(create_model_role_guards, dispatch_uid="create_model_role_guards", weak=False)
+
+
+def restrict_role(role: str, model: Any, revocation_type: str, using: str = "default") -> None:
+    if router.db_for_write(model) != using:
+        return
 
-def restrict_role(role: str, model: Any, revocation_type: str) -> None:
     using = router.db_for_write(model)
-    with connections[using].cursor() as connection:
+    with get_connection(using).cursor() as connection:
         connection.execute(f"REVOKE {revocation_type} ON public.{model._meta.db_table} FROM {role}")
 
 

+ 22 - 57
tests/conftest.py

@@ -1,8 +1,7 @@
 import os
-from typing import MutableSet
 
 import pytest
-from django.db.transaction import get_connection
+from django.db import connections
 
 from sentry.silo import SiloMode
 
@@ -170,63 +169,29 @@ def protect_hybrid_cloud_writes_and_deletes(request):
     If you are certain you need to delete the objects in a new codepath, check out User.delete
     logic to see how to escalate the connection's role in tests.  Make absolutely sure that you
     create Outbox objects in the same transaction that matches what you delete.
-    """
-    from sentry.db.models.fields.hybrid_cloud_foreign_key import HybridCloudForeignKey
-    from sentry.models import (
-        Organization,
-        OrganizationMapping,
-        OrganizationMember,
-        OrganizationMemberMapping,
-    )
-    from sentry.testutils.silo import iter_models, reset_test_role, restrict_role
 
-    try:
-        with get_connection().cursor() as conn:
-            conn.execute("SET ROLE 'postgres'")
-    except (RuntimeError, AssertionError) as e:
-        # Tests that do not have access to the database should pass through.
-        # Ideally we'd use request.fixture names to infer this, but there didn't seem to be a single stable
-        # fixture name that fully covered all cases of database access, so this approach is "try and then fail".
-        if "Database access not allowed" in str(e) or "Database queries to" in str(e):
-            yield
-            return
-
-    reset_test_role(role="postgres_unprivileged")
-
-    # "De-escalate" the default connection's permission level to prevent queryset level deletions of HCFK.
-    seen_models: MutableSet[type] = set()
-    for model in iter_models():
-        for field in model._meta.fields:
-            if not isinstance(field, HybridCloudForeignKey):
-                continue
-            fk_model = field.foreign_model
-            if fk_model is None or fk_model in seen_models:
-                continue
-            seen_models.add(fk_model)
-            restrict_role(role="postgres_unprivileged", model=fk_model, revocation_type="DELETE")
-
-    # Protect organization members from being updated without also invoking the correct outbox logic.
-    # If you hit test failures as a result of lacking these privileges, first ensure that you create the correct
-    # outboxes in a transaction, and cover that transaction with `in_test_psql_role_override`
-    restrict_role(role="postgres_unprivileged", model=OrganizationMember, revocation_type="INSERT")
-    restrict_role(role="postgres_unprivileged", model=OrganizationMember, revocation_type="UPDATE")
-    restrict_role(role="postgres_unprivileged", model=Organization, revocation_type="INSERT")
-    restrict_role(role="postgres_unprivileged", model=Organization, revocation_type="UPDATE")
-    restrict_role(role="postgres_unprivileged", model=OrganizationMapping, revocation_type="INSERT")
-    restrict_role(role="postgres_unprivileged", model=OrganizationMapping, revocation_type="UPDATE")
-    # OrganizationMember objects need to cascade, but they can't use the standard hybrid cloud foreign key because the
-    # identifiers are not snowflake ids.
-    restrict_role(role="postgres_unprivileged", model=OrganizationMember, revocation_type="DELETE")
-
-    restrict_role(
-        role="postgres_unprivileged", model=OrganizationMemberMapping, revocation_type="INSERT"
-    )
-
-    with get_connection().cursor() as conn:
-        conn.execute("SET ROLE 'postgres_unprivileged'")
+    See sentry.testutils.silo for where the postgres_unprivileged role comes from and
+    how its permissions are assigned.
+    """
+    for conn in connections.all():
+        try:
+            with conn.cursor() as cursor:
+                cursor.execute("SET ROLE 'postgres'")
+        except (RuntimeError, AssertionError) as e:
+            # Tests that do not have access to the database should pass through.
+            # Ideally we'd use request.fixture names to infer this, but there didn't seem to be a single stable
+            # fixture name that fully covered all cases of database access, so this approach is "try and then fail".
+            if "Database access not allowed" in str(e) or "Database queries to" in str(e):
+                yield
+                return
+            raise e
+
+        with conn.cursor() as cursor:
+            cursor.execute("SET ROLE 'postgres_unprivileged'")
 
     try:
         yield
     finally:
-        with get_connection().cursor() as conn:
-            conn.execute("SET ROLE 'postgres'")
+        for connection in connections.all():
+            with connection.cursor() as cursor:
+                cursor.execute("SET ROLE 'postgres'")