2 years ago · 1abd29e8d0
--- a/migrations_lockfile.txt
+++ b/migrations_lockfile.txt
@@ -6,5 +6,5 @@ To resolve this, rebase against latest master and regenerate your migration. Thi
 
				 will then be regenerated, and you should be able to merge without conflicts.
			
 
				 
			
 
				 nodestore: 0002_nodestore_no_dictfield
			
 
				-sentry: 0353_add_missing_uuid_unique_indexes
			
 
				+sentry: 0354_break_saved_search_foreign_key
			
 
				 social_auth: 0001_initial
			
--- a/src/sentry/api/endpoints/organization_searches.py
+++ b/src/sentry/api/endpoints/organization_searches.py
@@ -40,7 +40,7 @@ class OrganizationSearchesEndpoint(OrganizationEndpoint):
 
				             # the same organization. DOES include the requesting users pinned
			
 
				             # search
			
 
				             .exclude(
			
 
				-                ~Q(owner=request.user),
			
 
				+                ~Q(owner_id=request.user.id),
			
 
				                 visibility__in=(Visibility.OWNER, Visibility.OWNER_PINNED),
			
 
				             )
			
 
				             .filter(
			
@@ -83,7 +83,7 @@ class OrganizationSearchesEndpoint(OrganizationEndpoint):
 
				                 organization=organization,
			
 
				                 type=SearchType.ISSUE.value,
			
 
				                 visibility=Visibility.OWNER,
			
 
				-                owner=request.user,
			
 
				+                owner_id=request.user.id,
			
 
				                 query=result["query"],
			
 
				             ).exists():
			
 
				                 return Response(
			
@@ -93,7 +93,7 @@ class OrganizationSearchesEndpoint(OrganizationEndpoint):
 
				 
			
 
				         saved_search = SavedSearch.objects.create(
			
 
				             organization=organization,
			
 
				-            owner=request.user,
			
 
				+            owner_id=request.user.id,
			
 
				             type=result["type"],
			
 
				             name=result["name"],
			
 
				             query=result["query"],
			
--- a/src/sentry/auth/helper.py
+++ b/src/sentry/auth/helper.py
@@ -236,7 +236,7 @@ class AuthIdentityHandler:
 
				             log_service.record_audit_log(
			
 
				                 event=AuditLogEvent(
			
 
				                     organization_id=self.organization.id,
			
 
				-                    time_of_creation=timezone.now(),
			
 
				+                    date_added=timezone.now(),
			
 
				                     event_id=audit_log.get_event_id("MEMBER_ADD"),
			
 
				                     actor_user_id=user.id,
			
 
				                     actor_label=user.username,
			
@@ -319,7 +319,7 @@ class AuthIdentityHandler:
 
				             log_service.record_audit_log(
			
 
				                 event=AuditLogEvent(
			
 
				                     organization_id=self.organization.id,
			
 
				-                    time_of_creation=timezone.now(),
			
 
				+                    date_added=timezone.now(),
			
 
				                     event_id=audit_log.get_event_id("SSO_IDENTITY_LINK"),
			
 
				                     actor_user_id=self.user.id,
			
 
				                     actor_label=self.user.username,
			
--- a/src/sentry/conf/server.py
+++ b/src/sentry/conf/server.py
@@ -580,6 +580,9 @@ CELERY_IMPORTS = (
 
				     "sentry.tasks.commits",
			
 
				     "sentry.tasks.commit_context",
			
 
				     "sentry.tasks.deletion",
			
 
				+    "sentry.tasks.deletion.scheduled",
			
 
				+    "sentry.tasks.deletion.groups",
			
 
				+    "sentry.tasks.deletion.hybrid_cloud",
			
 
				     "sentry.tasks.deliver_from_outbox",
			
 
				     "sentry.tasks.digests",
			
 
				     "sentry.tasks.email",
			
@@ -805,6 +808,10 @@ CELERYBEAT_SCHEDULE = {
 
				         "schedule": crontab_with_minute_jitter(hour="*/6"),
			
 
				         "options": {"expires": 60 * 25},
			
 
				     },
			
 
				+    "schedule-hybrid-cloud-foreign-key-jobs": {
			
 
				+        "task": "sentry.tasks.deletion.hybrid_cloud.schedule_hybrid_cloud_foreign_key_jobs",
			
 
				+        "schedule": timedelta(minutes=15),
			
 
				+    },
			
 
				     "monitor-release-adoption": {
			
 
				         "task": "sentry.release_health.tasks.monitor_release_adoption",
			
 
				         "schedule": crontab(minute=0),
			
--- a/src/sentry/db/models/fields/hybrid_cloud_foreign_key.py
+++ b/src/sentry/db/models/fields/hybrid_cloud_foreign_key.py
@@ -0,0 +1,94 @@
 
				+"""
			
 
				+A 'foreign key' which is not enforced in the local database, but triggers eventually consistent delete work in the
			
 
				+presence of RegionTombstone ControlTombstone model objects through the tasks/deletion/hybrid_cloud.py logic.
			
 
				+
			
 
				+It's main purpose to support columns in, say, region silos that refer to User or Integration objects (conversely
			
 
				+also columns in the control silo that point to, say, organization) that do not actually exist in the local database,
			
 
				+or even the local network.
			
 
				+
			
 
				+Functionally, this field is just a dumb BigIntegerField. While it does support indexes, it does not support constraints.
			
 
				+This means, for instance, you should absolutely expect identifiers in this column that do not necessarily exist.
			
 
				+Eventually, if the related object is deleted and processed correctly, this column may be set null or cleaned up,
			
 
				+but at any given time an integer in this column makes no guarantees about the existence of a related object.
			
 
				+
			
 
				+Cascade behavior is provided to the application via tasks/deletion/hybrid_cloud.py jobs.  Again, to emphasize, this
			
 
				+process is eventually consistent, and the timing of the completion of those process is a black box of systems
			
 
				+behavior, networking, and tuning.
			
 
				+
			
 
				+To add this field to a model, you need to do a few preparatory steps:
			
 
				+1.  Ensure that the 'model' pointed to by this field is in an opposing silo mode.  Tests *should* fail for any usage
			
 
				+of a HybridCloudForeignKey that points from and to models in the same silo mode.
			
 
				+2.  Ensure that the foreign model being referenced produces outboxes to sync tombstones in an atomic transaction.
			
 
				+For most common cross silo models, there should be a custom delete method already that implements this.
			
 
				+If not, it's ideal to first consult with the hybrid cloud team beforehand to strategize on the outbox and
			
 
				+deletion strategies.
			
 
				+3.  Validate that either the default, or the registered bulk deletions in sentry/deletions/__init__.py make sense
			
 
				+for your model.  This is especially true if your model previously had no cascade logic (a new model, for instance)
			
 
				+4.  For an existing field to a HCFK, django will produce a non working migration by default.  There is no way to
			
 
				+configure the auto generated django migrations unfortunately.  You'll need to carefully build a migration by following
			
 
				+this pattern:
			
 
				+    a. register a database operation that alters the field to a ForeignKey with db_constraint=False, in order to produce
			
 
				+    the custom sql of actually dropping the existing constraint in the database.
			
 
				+    b. register state operations that further adjust the internal django field state as follows:
			
 
				+        i. alters the original field to the new HybridCloudForeignKey (use the generated migration for this)
			
 
				+        ii. renames that field to the `_id` form (eg user => user_id)
			
 
				+        iii. removes, then re-adds, any other, say, unique constraints that depended on the original field.  They still
			
 
				+            exist, but due to ii, they need to be reconstructed in terms of the renamed field name, even if the column
			
 
				+            name is the same.
			
 
				+4a. Basically, don't change an existing field to HCFK.  The hybrid cloud team probably needs to carefully manage that.
			
 
				+
			
 
				+Ideally, when applying this field, you write model test that validates that deletion of your parent model produces
			
 
				+the expected cascade behavior in your field.
			
 
				+"""
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+from enum import IntEnum
			
 
				+
			
 
				+from django.db import models
			
 
				+
			
 
				+__all__ = "HybridCloudForeignKey"
			
 
				+
			
 
				+from typing import Any, Tuple
			
 
				+
			
 
				+from django.apps import apps
			
 
				+
			
 
				+
			
 
				+class HybridCloudForeignKeyCascadeBehavior(IntEnum):
			
 
				+    CASCADE = 1
			
 
				+    SET_NULL = 2
			
 
				+
			
 
				+
			
 
				+class HybridCloudForeignKey(models.BigIntegerField):  # type: ignore
			
 
				+    on_delete: str
			
 
				+    foreign_model_name: str
			
 
				+
			
 
				+    @property
			
 
				+    def foreign_model(self) -> Any:
			
 
				+        parts = self.foreign_model_name.split(".")
			
 
				+        return apps.get_model(app_label=parts[0], model_name=parts[1])
			
 
				+
			
 
				+    @property
			
 
				+    def foreign_table_name(self) -> str:
			
 
				+        return self.foreign_model._meta.db_table
			
 
				+
			
 
				+    def __init__(
			
 
				+        self, foreign_model: str, *, on_delete: HybridCloudForeignKeyCascadeBehavior | str, **kwds
			
 
				+    ):
			
 
				+        self.on_delete = (
			
 
				+            on_delete
			
 
				+            if isinstance(on_delete, HybridCloudForeignKeyCascadeBehavior)
			
 
				+            else HybridCloudForeignKeyCascadeBehavior[on_delete.upper()]
			
 
				+        ).name.upper()
			
 
				+
			
 
				+        parts = foreign_model.split(".")
			
 
				+        assert (
			
 
				+            len(parts) == 2
			
 
				+        ), f"{self.__class__.__name__} model reference must be <app>.<ModelName>, got {foreign_model}"
			
 
				+        self.foreign_model_name = foreign_model
			
 
				+
			
 
				+        kwds.setdefault("db_index", True)
			
 
				+        super().__init__(**kwds)
			
 
				+
			
 
				+    def deconstruct(self) -> Tuple[Any, Any, Any, Any]:
			
 
				+        (name, path, args, kwds) = super().deconstruct()
			
 
				+        return name, path, [self.foreign_model_name], dict(on_delete=self.on_delete, **kwds)
			
--- a/src/sentry/db/postgres/roles.py
+++ b/src/sentry/db/postgres/roles.py
@@ -0,0 +1,28 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+import contextlib
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+from django.db.transaction import get_connection
			
 
				+
			
 
				+
			
 
				+@contextlib.contextmanager
			
 
				+def test_psql_role_override(role_name: str, using: str | None = None):
			
 
				+    """
			
 
				+    During test runs, the role of the current connection will be swapped with role_name, and then swapped
			
 
				+    back to its original.  Has no effect in production.
			
 
				+    """
			
 
				+
			
 
				+    if "pytest" not in sys.modules or os.environ.get("DB", "postgres") != "postgres":
			
 
				+        yield
			
 
				+        return
			
 
				+
			
 
				+    with get_connection(using).cursor() as conn:
			
 
				+        conn.execute("SELECT user")
			
 
				+        (cur,) = conn.fetchone()
			
 
				+        conn.execute("SET ROLE %s", [role_name])
			
 
				+        try:
			
 
				+            yield
			
 
				+        finally:
			
 
				+            conn.execute("SET ROLE %s", [cur])
			
--- a/src/sentry/migrations/0354_break_saved_search_foreign_key.py
+++ b/src/sentry/migrations/0354_break_saved_search_foreign_key.py
@@ -0,0 +1,67 @@
 
				+# Generated by Django 2.2.28 on 2023-01-31 22:25
			
 
				+
			
 
				+from django.db import migrations, models
			
 
				+
			
 
				+import sentry.db.models.fields.hybrid_cloud_foreign_key
			
 
				+from sentry.new_migrations.migrations import CheckedMigration
			
 
				+
			
 
				+
			
 
				+class Migration(CheckedMigration):
			
 
				+    # This flag is used to mark that a migration shouldn't be automatically run in production. For
			
 
				+    # the most part, this should only be used for operations where it's safe to run the migration
			
 
				+    # after your code has deployed. So this should not be used for most operations that alter the
			
 
				+    # schema of a table.
			
 
				+    # Here are some things that make sense to mark as dangerous:
			
 
				+    # - Large data migrations. Typically we want these to be run manually by ops so that they can
			
 
				+    #   be monitored and not block the deploy for a long period of time while they run.
			
 
				+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
			
 
				+    #   have ops run this and not block the deploy. Note that while adding an index is a schema
			
 
				+    #   change, it's completely safe to run the operation after the code has deployed.
			
 
				+    is_dangerous = False
			
 
				+
			
 
				+    dependencies = [
			
 
				+        ("sentry", "0353_add_missing_uuid_unique_indexes"),
			
 
				+    ]
			
 
				+
			
 
				+    database_operations = [
			
 
				+        migrations.AlterField(
			
 
				+            model_name="savedsearch",
			
 
				+            name="owner",
			
 
				+            field=sentry.db.models.fields.foreignkey.FlexibleForeignKey(
			
 
				+                to="sentry.User", db_constraint=False, db_index=True, null=True
			
 
				+            ),
			
 
				+        ),
			
 
				+    ]
			
 
				+
			
 
				+    state_operations = [
			
 
				+        migrations.AlterField(
			
 
				+            model_name="savedsearch",
			
 
				+            name="owner",
			
 
				+            # field=models.IntegerField(db_index=True, null=False),
			
 
				+            field=sentry.db.models.fields.hybrid_cloud_foreign_key.HybridCloudForeignKey(
			
 
				+                "sentry.User",
			
 
				+                db_index=True,
			
 
				+                null=True,
			
 
				+                on_delete="CASCADE",
			
 
				+            ),
			
 
				+        ),
			
 
				+        migrations.RenameField(model_name="savedsearch", old_name="owner", new_name="owner_id"),
			
 
				+        migrations.RemoveConstraint(
			
 
				+            model_name="savedsearch",
			
 
				+            name="sentry_savedsearch_pinning_constraint",
			
 
				+        ),
			
 
				+        migrations.AddConstraint(
			
 
				+            model_name="savedsearch",
			
 
				+            constraint=models.UniqueConstraint(
			
 
				+                condition=models.Q(visibility="owner_pinned"),
			
 
				+                fields=("organization", "owner_id", "type"),
			
 
				+                name="sentry_savedsearch_pinning_constraint",
			
 
				+            ),
			
 
				+        ),
			
 
				+    ]
			
 
				+
			
 
				+    operations = [
			
 
				+        migrations.SeparateDatabaseAndState(
			
 
				+            database_operations=database_operations, state_operations=state_operations
			
 
				+        )
			
 
				+    ]
			
--- a/src/sentry/models/auditlogentry.py
+++ b/src/sentry/models/auditlogentry.py
@@ -93,7 +93,7 @@ class AuditLogEntry(Model):
 
				             organization_id=int(
			
 
				                 self.organization.id
			
 
				             ),  # prefer raising NoneType here over actually passing through
			
 
				-            time_of_creation=self.datetime or timezone.now(),
			
 
				+            date_added=self.datetime or timezone.now(),
			
 
				             actor_user_id=self.actor and self.actor.id,
			
 
				             target_object_id=self.target_object,
			
 
				             ip_address=self.ip_address and str(self.ip_address),
			
@@ -111,7 +111,7 @@ class AuditLogEntry(Model):
 
				         """
			
 
				         return AuditLogEntry(
			
 
				             organization_id=event.organization_id,
			
 
				-            datetime=event.time_of_creation,
			
 
				+            datetime=event.date_added,
			
 
				             actor_id=event.actor_user_id,
			
 
				             target_object=event.target_object_id,
			
 
				             ip_address=event.ip_address,
			
--- a/src/sentry/models/counter.py
+++ b/src/sentry/models/counter.py
@@ -13,6 +13,7 @@ from sentry.db.models import (
 
				     region_silo_only_model,
			
 
				     sane_repr,
			
 
				 )
			
 
				+from sentry.db.postgres.roles import test_psql_role_override
			
 
				 
			
 
				 
			
 
				 @region_silo_only_model
			
@@ -98,8 +99,7 @@ def create_counter_function(app_config, using, **kwargs):
 
				     if not get_model_if_available(app_config, "Counter"):
			
 
				         return
			
 
				 
			
 
				-    cursor = connections[using].cursor()
			
 
				-    try:
			
 
				+    with test_psql_role_override("postgres", using), connections[using].cursor() as cursor:
			
 
				         cursor.execute(
			
 
				             """
			
 
				             create or replace function sentry_increment_project_counter(
			
@@ -126,8 +126,6 @@ def create_counter_function(app_config, using, **kwargs):
 
				             $$ language plpgsql;
			
 
				         """
			
 
				         )
			
 
				-    finally:
			
 
				-        cursor.close()
			
 
				 
			
 
				 
			
 
				 post_migrate.connect(create_counter_function, dispatch_uid="create_counter_function", weak=False)
			
--- a/src/sentry/models/outbox.py
+++ b/src/sentry/models/outbox.py
@@ -22,6 +22,7 @@ from sentry.db.models import (
 
				     sane_repr,
			
 
				 )
			
 
				 from sentry.silo import SiloMode
			
 
				+from sentry.utils import metrics
			
 
				 
			
 
				 THE_PAST = datetime.datetime(2016, 8, 1, 0, 0, 0, 0, tzinfo=timezone.utc)
			
 
				 
			
@@ -154,6 +155,11 @@ class OutboxBase(Model):
 
				     def next_schedule(self, now: datetime.datetime) -> datetime.datetime:
			
 
				         return now + (self.last_delay() * 2)
			
 
				 
			
 
				+    def save(self, **kwds: Any):
			
 
				+        tags = {"category": OutboxCategory(self.category).name}
			
 
				+        metrics.incr("outbox.saved", 1, tags=tags)
			
 
				+        super().save(**kwds)
			
 
				+
			
 
				     @contextlib.contextmanager
			
 
				     def process_coalesced(self) -> Generator[OutboxBase | None, None, None]:
			
 
				         # Do not, use a select for update here -- it is tempting, but a major performance issue.
			
@@ -163,12 +169,24 @@ class OutboxBase(Model):
 
				         coalesced: OutboxBase | None = self.select_coalesced_messages().last()
			
 
				         yield coalesced
			
 
				         if coalesced is not None:
			
 
				-            self.select_coalesced_messages().filter(id__lte=coalesced.id).delete()
			
 
				+            first_coalesced: OutboxBase = self.select_coalesced_messages().first() or coalesced
			
 
				+            _, deleted = self.select_coalesced_messages().filter(id__lte=coalesced.id).delete()
			
 
				+            tags = {"category": OutboxCategory(self.category).name}
			
 
				+            metrics.incr("outbox.processed", deleted, tags=tags)
			
 
				+            metrics.timing(
			
 
				+                "outbox.processing_lag",
			
 
				+                datetime.datetime.now().timestamp() - first_coalesced.scheduled_from.timestamp(),
			
 
				+                tags=tags,
			
 
				+            )
			
 
				 
			
 
				     def process(self) -> bool:
			
 
				         with self.process_coalesced() as coalesced:
			
 
				             if coalesced is not None:
			
 
				-                coalesced.send_signal()
			
 
				+                with metrics.timer(
			
 
				+                    "outbox.send_signal.duration",
			
 
				+                    tags={"category": OutboxCategory(coalesced.category).name},
			
 
				+                ):
			
 
				+                    coalesced.send_signal()
			
 
				                 return True
			
 
				         return False
			
 
				 
			
@@ -191,14 +209,6 @@ class OutboxBase(Model):
 
				                 f"Could not flush items from shard {self.key_from(self.sharding_columns)!r}"
			
 
				             )
			
 
				 
			
 
				-    @classmethod
			
 
				-    def for_shard(cls: Type[_T], shard_scope: OutboxScope, shard_identifier: int) -> _T:
			
 
				-        """
			
 
				-        Logically, this is just an alias for the constructor of cls, but explicitly named to call out the intended
			
 
				-        semantic of creating and instance to invoke `drain_shard` on.
			
 
				-        """
			
 
				-        return cls(shard_scope=shard_scope, shard_identifier=shard_identifier)
			
 
				-
			
 
				 
			
 
				 MONOLITH_REGION_NAME = "--monolith--"
			
 
				 
			
@@ -234,6 +244,14 @@ class RegionOutbox(OutboxBase):
 
				             ("shard_scope", "shard_identifier", "id"),
			
 
				         )
			
 
				 
			
 
				+    @classmethod
			
 
				+    def for_shard(cls: Type[_T], shard_scope: OutboxScope, shard_identifier: int) -> _T:
			
 
				+        """
			
 
				+        Logically, this is just an alias for the constructor of cls, but explicitly named to call out the intended
			
 
				+        semantic of creating and instance to invoke `drain_shard` on.
			
 
				+        """
			
 
				+        return cls(shard_scope=shard_scope, shard_identifier=shard_identifier)
			
 
				+
			
 
				     __repr__ = sane_repr("shard_scope", "shard_identifier", "category", "object_identifier")
			
 
				 
			
 
				 
			
@@ -253,7 +271,7 @@ class ControlOutbox(OutboxBase):
 
				 
			
 
				     def send_signal(self):
			
 
				         process_control_outbox.send(
			
 
				-            sender=self.category,
			
 
				+            sender=OutboxCategory(self.category),
			
 
				             payload=self.payload,
			
 
				             region_name=self.region_name,
			
 
				             object_identifier=self.object_identifier,
			
@@ -301,6 +319,18 @@ class ControlOutbox(OutboxBase):
 
				             result.payload = payload
			
 
				             yield result
			
 
				 
			
 
				+    @classmethod
			
 
				+    def for_shard(
			
 
				+        cls: Type[_T], shard_scope: OutboxScope, shard_identifier: int, region_name: str
			
 
				+    ) -> _T:
			
 
				+        """
			
 
				+        Logically, this is just an alias for the constructor of cls, but explicitly named to call out the intended
			
 
				+        semantic of creating and instance to invoke `drain_shard` on.
			
 
				+        """
			
 
				+        return cls(
			
 
				+            shard_scope=shard_scope, shard_identifier=shard_identifier, region_name=region_name
			
 
				+        )
			
 
				+
			
 
				 
			
 
				 def _find_orgs_for_user(user_id: int) -> Set[int]:
			
 
				     # TODO: This must be changed to the org member mapping in the control silo eventually.