Browse Source

migration: backfill apitoken hashed values (#71728)


We've been hashing tokens as they are used to authenticate
(, but it's started to
level out. This is a backfill migration to fill in all of the hashed
values for the remaining tokens.

Huge thank you to @markstory @wedamija and @GabeVillalobos for helping
with the migration test! 🙏
Matthew T 9 months ago

+ 1 - 1

@@ -9,5 +9,5 @@ feedback: 0004_index_together
 hybridcloud: 0016_add_control_cacheversion
 hybridcloud: 0016_add_control_cacheversion
 nodestore: 0002_nodestore_no_dictfield
 nodestore: 0002_nodestore_no_dictfield
 replays: 0004_index_together
 replays: 0004_index_together
-sentry: 0725_create_sentry_groupsearchview_table
+sentry: 0726_apitoken_backfill_hashes
 social_auth: 0002_default_auto_field
 social_auth: 0002_default_auto_field

+ 154 - 0

@@ -0,0 +1,154 @@
+# Generated by Django 5.0.6 on 2024-05-29 21:28
+import hashlib
+import logging
+from enum import IntEnum
+from django.db import migrations, router
+from django.db.backends.base.schema import BaseDatabaseSchemaEditor
+from django.db.migrations.state import StateApps
+from sentry.new_migrations.migrations import CheckedMigration
+from sentry.utils.query import RangeQuerySetWrapperWithProgressBar
+logger = logging.getLogger(__name__)
+def backfill_hash_values(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
+    ApiToken = apps.get_model("sentry", "ApiToken")
+    ControlOutbox = apps.get_model("sentry", "ControlOutbox")
+    OrganizationMemberMapping = apps.get_model("sentry", "OrganizationMemberMapping")
+    OrganizationMapping = apps.get_model("sentry", "OrganizationMapping")
+    try:
+        from import Container
+        from django.conf import settings
+        from import control_silo_function
+        from sentry.silo.base import SiloMode
+        from import unguarded_write
+    except ImportError:
+        logger.exception("Cannot execute migration. Required symbols could not be imported")
+        return
+    # copied from src/sentry/models/
+    class OutboxCategory(IntEnum):
+        USER_UPDATE = 0
+        UNUSED_TWO = 4
+        UNUSUED_THREE = 13
+        UNUSED_ONE = 19
+        API_TOKEN_UPDATE = 32
+    # copied from src/sentry/models/
+    _outbox_categories_for_scope: dict[int, set[OutboxCategory]] = {}
+    _used_categories: set[OutboxCategory] = set()
+    # copied from src/sentry/models/
+    def scope_categories(enum_value: int, categories: set[OutboxCategory]) -> int:
+        _outbox_categories_for_scope[enum_value] = categories
+        inter = _used_categories.intersection(categories)
+        assert not inter, f"OutboxCategories {inter} were already registered to a different scope"
+        _used_categories.update(categories)
+        return enum_value
+    # copied from src/sentry/models/
+    class OutboxScope(IntEnum):
+        USER_SCOPE = scope_categories(
+            1,
+            {
+                OutboxCategory.USER_UPDATE,
+                OutboxCategory.API_TOKEN_UPDATE,
+                OutboxCategory.UNUSED_ONE,
+                OutboxCategory.UNUSED_TWO,
+                OutboxCategory.UNUSUED_THREE,
+                OutboxCategory.AUTH_IDENTITY_UPDATE,
+            },
+        )
+    @control_silo_function
+    def _find_orgs_for_user(user_id: int) -> set[int]:
+        return {
+            m["organization_id"]
+            for m in OrganizationMemberMapping.objects.filter(user_id=user_id).values(
+                "organization_id"
+            )
+        }
+    @control_silo_function
+    def find_regions_for_orgs(org_ids: Container[int]) -> set[str]:
+        if SiloMode.get_current_mode() == SiloMode.MONOLITH:
+            return {settings.SENTRY_MONOLITH_REGION}
+        else:
+            return set(
+                OrganizationMapping.objects.filter(organization_id__in=org_ids).values_list(
+                    "region_name", flat=True
+                )
+            )
+    @control_silo_function
+    def find_regions_for_user(user_id: int) -> set[str]:
+        if SiloMode.get_current_mode() == SiloMode.MONOLITH:
+            return {settings.SENTRY_MONOLITH_REGION}
+        org_ids = _find_orgs_for_user(user_id)
+        return find_regions_for_orgs(org_ids)
+    for api_token in RangeQuerySetWrapperWithProgressBar(ApiToken.objects.all()):
+        hashed_token = None
+        if api_token.hashed_token is None:
+            hashed_token = hashlib.sha256(api_token.token.encode()).hexdigest()
+            api_token.hashed_token = hashed_token
+        # if there's a refresh token make sure it is hashed as well
+        hashed_refresh_token = None
+        if api_token.refresh_token:
+            hashed_refresh_token = hashlib.sha256(api_token.refresh_token.encode()).hexdigest()
+            api_token.hashed_refresh_token = hashed_refresh_token
+        # only save if we've actually had to hash values
+        if hashed_token or hashed_refresh_token:
+            with unguarded_write(using=router.db_for_write(ApiToken)):
+      ["hashed_token", "hashed_refresh_token"])
+                user_regions = find_regions_for_user(api_token.user_id)
+                for region in user_regions:
+                    ControlOutbox.objects.create(
+                        shard_scope=OutboxScope.USER_SCOPE,
+                        shard_identifier=api_token.user_id,
+                        category=OutboxCategory.API_TOKEN_UPDATE,
+                        region_name=region,
+              ,
+                    )
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production.
+    # This should only be used for operations where it's safe to run the migration after your
+    # code has deployed. So this should not be used for most operations that alter the schema
+    # of a table.
+    # Here are some things that make sense to mark as post deployment:
+    # - Large data migrations. Typically we want these to be run manually so that they can be
+    #   monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   run this outside deployments so that we don't block them. Note that while adding an index
+    #   is a schema change, it's completely safe to run the operation after the code has deployed.
+    # Once deployed, run these manually via:
+    is_post_deployment = True
+    dependencies = [
+        ("sentry", "0725_create_sentry_groupsearchview_table"),
+    ]
+    operations = [
+        migrations.RunPython(
+            backfill_hash_values,
+            migrations.RunPython.noop,
+            hints={
+                "tables": [
+                    "sentry_apitoken",
+                ]
+            },
+        )
+    ]

+ 53 - 0

@@ -0,0 +1,53 @@
+from sentry.models.outbox import ControlOutbox, OutboxCategory, OutboxScope
+from sentry.testutils.cases import TestMigrations
+from sentry.testutils.helpers import override_options
+from sentry.testutils.silo import control_silo_test
+class TestBackfillApiTokenHashesMigration(TestMigrations):
+    migrate_from = "0725_create_sentry_groupsearchview_table"
+    migrate_to = "0726_apitoken_backfill_hashes"
+    connection = "control"
+    @override_options({"": False})
+    def setup_initial_state(self):
+        user = self.create_user()
+        self.user_auth_token = self.create_user_auth_token(user=user)
+        # Put the user in an org so we have membership
+        organization = self.create_organization(owner=user)
+        app = self.create_sentry_app(user=user,
+        self.app_install = self.create_sentry_app_installation(
+            organization=organization, user=user, slug=app.slug
+        )
+        assert self.user_auth_token.hashed_token is None
+        # user auth tokens do not have refresh tokens
+        assert self.user_auth_token.refresh_token is None
+        assert self.app_install.api_token.hashed_token is None
+        assert self.app_install.api_token.hashed_refresh_token is None
+        # tokens related to sentry apps do have refresh tokens
+        assert self.app_install.api_token.refresh_token is not None
+    def test_for_hashed_value(self):
+        self.user_auth_token.refresh_from_db()
+        assert self.user_auth_token.hashed_token is not None
+        assert ControlOutbox.objects.get(
+            shard_scope=OutboxScope.USER_SCOPE,
+            category=OutboxCategory.API_TOKEN_UPDATE,
+  ,
+            shard_identifier=self.user_auth_token.user_id,
+        )
+        self.app_install.refresh_from_db()
+        assert self.app_install.api_token.hashed_token is not None
+        assert self.app_install.api_token.hashed_refresh_token is not None
+        assert ControlOutbox.objects.get(
+            shard_scope=OutboxScope.USER_SCOPE,
+            category=OutboxCategory.API_TOKEN_UPDATE,
+  ,
+            shard_identifier=self.app_install.api_token.user_id,
+        )