Browse Source

feat(grouping): Store latest grouping config in grouphash metadata (#78366)

There have been times, looking at records in the `GroupHash` table, when we've wished we knew whether a given hash was current, or just cruft left over from some older version of the grouping code or some older config now no longer in use. For `GroupHash` records with an associated `GroupHashMetadata` record, we can already sort of answer the first question, because we now have a creation date for each grouphash. This PR gives us the ability to answer the second, by adding to the metadata the grouping config used to calculate the hash.

Notes:

- The data stored is actually _latest_ grouping config, because we want to be able to distinguish between a hash originally calculated with legacy config X which is still being produced by current config Y, and a hash originally calculated with legacy config X which is no longer being produced by current config Y. In the former case, the grouphash is still actively being used to match events to its group. In the latter case, the grouphash is effectively dead - we'll never use it to match another event to the group - and so it can eventually be culled. (Be it via cronjob or one-off cleanup, we don't yet have a system which could do said culling, but at lest now we'll have the information when we need it.)

- Because there are already existing `GroupHashMetadata` records without a config set, the field is nullable. Fortunately, we know that there's only been one grouping config active during the time we've been creating records, so it will be easy to backfill the data in a follow-up PR.
Katie Byers 5 months ago
parent
commit
58e3ccdc8d

+ 1 - 1
migrations_lockfile.txt

@@ -10,7 +10,7 @@ hybridcloud: 0016_add_control_cacheversion
 nodestore: 0002_nodestore_no_dictfield
 remote_subscriptions: 0003_drop_remote_subscription
 replays: 0004_index_together
-sentry: 0770_increase_project_slug_max_length
+sentry: 0771_add_grouping_config_to_grouphash_metadata
 social_auth: 0002_default_auto_field
 uptime: 0016_translate_uptime_object_headers_to_lists
 workflow_engine: 0008_detector_state

+ 1 - 1
src/sentry/event_manager.py

@@ -1376,7 +1376,7 @@ def get_hashes_and_grouphashes(
     grouping_config, hashes = hash_calculation_function(project, job, metric_tags)
 
     if hashes:
-        grouphashes = get_or_create_grouphashes(project, hashes)
+        grouphashes = get_or_create_grouphashes(project, hashes, grouping_config["id"])
 
         existing_grouphash = find_grouphash_with_group(grouphashes)
 

+ 17 - 7
src/sentry/grouping/ingest/hashing.py

@@ -215,7 +215,9 @@ def find_grouphash_with_group(
     return None
 
 
-def get_or_create_grouphashes(project: Project, hashes: Sequence[str]) -> list[GroupHash]:
+def get_or_create_grouphashes(
+    project: Project, hashes: Sequence[str], grouping_config: str
+) -> list[GroupHash]:
     grouphashes = []
 
     for hash_value in hashes:
@@ -223,13 +225,21 @@ def get_or_create_grouphashes(project: Project, hashes: Sequence[str]) -> list[G
 
         # TODO: Do we want to expand this to backfill metadata for existing grouphashes? If we do,
         # we'll have to override the metadata creation date for them.
-        if (
-            created
-            and options.get("grouping.grouphash_metadata.ingestion_writes_enabled")
-            and features.has("organizations:grouphash-metadata-creation", project.organization)
+        if options.get("grouping.grouphash_metadata.ingestion_writes_enabled") and features.has(
+            "organizations:grouphash-metadata-creation", project.organization
         ):
-            # For now, this just creates a record with a creation timestamp
-            GroupHashMetadata.objects.create(grouphash=grouphash)
+            if created:
+                GroupHashMetadata.objects.create(
+                    grouphash=grouphash,
+                    latest_grouping_config=grouping_config,
+                )
+            elif (
+                grouphash.metadata and grouphash.metadata.latest_grouping_config != grouping_config
+            ):
+                # Keep track of the most recent config which computed this hash, so that once a
+                # config is deprecated, we can clear out the GroupHash records which are no longer
+                # being produced
+                grouphash.metadata.update(latest_grouping_config=grouping_config)
 
         grouphashes.append(grouphash)
 

+ 33 - 0
src/sentry/migrations/0771_add_grouping_config_to_grouphash_metadata.py

@@ -0,0 +1,33 @@
+# Generated by Django 5.1.1 on 2024-10-01 02:06
+
+from django.db import migrations, models
+
+from sentry.new_migrations.migrations import CheckedMigration
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production.
+    # This should only be used for operations where it's safe to run the migration after your
+    # code has deployed. So this should not be used for most operations that alter the schema
+    # of a table.
+    # Here are some things that make sense to mark as post deployment:
+    # - Large data migrations. Typically we want these to be run manually so that they can be
+    #   monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   run this outside deployments so that we don't block them. Note that while adding an index
+    #   is a schema change, it's completely safe to run the operation after the code has deployed.
+    # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
+
+    is_post_deployment = False
+
+    dependencies = [
+        ("sentry", "0770_increase_project_slug_max_length"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="grouphashmetadata",
+            name="latest_grouping_config",
+            field=models.CharField(null=True),
+        ),
+    ]

+ 6 - 0
src/sentry/models/grouphashmetadata.py

@@ -18,6 +18,12 @@ class GroupHashMetadata(Model):
     )
     date_added = models.DateTimeField(default=timezone.now)
 
+    # HASHING
+
+    # Most recent config to produce this hash
+    # TODO: Backfill the current config for grouphashes with metadata and then make this non-nullable
+    latest_grouping_config = models.CharField(null=True)
+
     # SEER
 
     # When this hash was sent to Seer. This will be different than `date_added` if we send it to

+ 70 - 0
tests/sentry/event_manager/grouping/test_grouphash_metadata.py

@@ -1,17 +1,28 @@
 from __future__ import annotations
 
+from time import time
+
 from sentry.models.grouphash import GroupHash
 from sentry.models.grouphashmetadata import GroupHashMetadata
+from sentry.projectoptions.defaults import DEFAULT_GROUPING_CONFIG, LEGACY_GROUPING_CONFIG
 from sentry.testutils.cases import TestCase
 from sentry.testutils.helpers import Feature
 from sentry.testutils.helpers.eventprocessing import save_new_event
+from sentry.testutils.helpers.features import with_feature
 from sentry.testutils.helpers.options import override_options
 from sentry.testutils.skips import requires_snuba
+from sentry.utils.types import NonNone
 
 pytestmark = [requires_snuba]
 
 
 class GroupHashMetadataTest(TestCase):
+    # Helper method to save us from having to assert the existence of `grouphash` and
+    # `grouphash.metadata` every time we want to check a value
+    def assert_metadata_value(self, grouphash, value_name, value):
+        assert grouphash and grouphash.metadata
+        assert getattr(grouphash.metadata, value_name) == value
+
     def test_creates_grouphash_metadata_when_appropriate(self):
         # The killswitch is obeyed
         with override_options({"grouping.grouphash_metadata.ingestion_writes_enabled": False}):
@@ -44,3 +55,62 @@ class GroupHashMetadataTest(TestCase):
                 project=self.project, hash=event4.get_primary_hash()
             ).first()
             assert grouphash and grouphash.metadata is None
+
+    @with_feature("organizations:grouphash-metadata-creation")
+    def test_stores_grouping_config(self):
+        event = save_new_event({"message": "Dogs are great!"}, self.project)
+        grouphash = GroupHash.objects.filter(
+            project=self.project, hash=event.get_primary_hash()
+        ).first()
+
+        self.assert_metadata_value(grouphash, "latest_grouping_config", DEFAULT_GROUPING_CONFIG)
+
+    @with_feature("organizations:grouphash-metadata-creation")
+    def test_updates_grouping_config(self):
+        self.project.update_option("sentry:grouping_config", LEGACY_GROUPING_CONFIG)
+
+        event1 = save_new_event({"message": "Dogs are great!"}, self.project)
+        grouphash1 = GroupHash.objects.filter(
+            project=self.project, hash=event1.get_primary_hash()
+        ).first()
+
+        self.assert_metadata_value(grouphash1, "latest_grouping_config", LEGACY_GROUPING_CONFIG)
+
+        # Update the grouping config. Since there's nothing to parameterize in the message, the
+        # result should be the same under both configs.
+        self.project.update_option("sentry:grouping_config", DEFAULT_GROUPING_CONFIG)
+
+        event2 = save_new_event({"message": "Dogs are great!"}, self.project)
+        grouphash2 = GroupHash.objects.filter(
+            project=self.project, hash=event2.get_primary_hash()
+        ).first()
+
+        self.assert_metadata_value(grouphash2, "latest_grouping_config", DEFAULT_GROUPING_CONFIG)
+
+        # Make sure we're dealing with a single grouphash that got updated rather than two different grouphashes
+        assert grouphash1 and grouphash2 and grouphash1.id == grouphash2.id
+
+    @with_feature("organizations:grouphash-metadata-creation")
+    def test_stores_correct_config_on_primary_and_secondary_hash(self):
+        # Set the project to be in a grouping config transition so that primary and secondary hashes
+        # will both be calculated, and include numbers in the message of one of the events sent to
+        # Seer so that the primary and secondary hashes will be different (since the legacy config
+        # won't parameterize the numbers)
+        self.project.update_option("sentry:grouping_config", DEFAULT_GROUPING_CONFIG)
+        self.project.update_option("sentry:secondary_grouping_config", LEGACY_GROUPING_CONFIG)
+        self.project.update_option("sentry:secondary_grouping_expiry", time() + 3600)
+
+        event = save_new_event({"message": "Dogs are great! 11211231"}, self.project)
+
+        grouphashes = GroupHash.objects.filter(group_id=NonNone(event.group_id))
+        assert len(grouphashes) == 2
+
+        primary_grouphash = grouphashes.filter(hash=event.get_primary_hash()).first()
+        secondary_grouphash = grouphashes.exclude(hash=event.get_primary_hash()).first()
+
+        self.assert_metadata_value(
+            primary_grouphash, "latest_grouping_config", DEFAULT_GROUPING_CONFIG
+        )
+        self.assert_metadata_value(
+            secondary_grouphash, "latest_grouping_config", LEGACY_GROUPING_CONFIG
+        )