Browse Source

fix(reprocessing): Apply fresh grouping config (#33176)

Reprocessing is a feature for native platforms intended to re-apply
debug files. The behavior of reprocessing with regards to grouping
configuration has been inconsistent: While fingerprinting rules were
always taken fresh from the project, stack trace rules and the grouping
strategy were taken from the event. This PR aligns this behavior by
always loading the grouping config from the project when handling a
reprocessed event.
Joris Bayer 2 years ago
parent
commit
928f479a08
2 changed files with 169 additions and 6 deletions
  1. 15 6
      src/sentry/event_manager.py
  2. 154 0
      tests/sentry/tasks/test_reprocessing2.py

+ 15 - 6
src/sentry/event_manager.py

@@ -389,9 +389,18 @@ class EventManager:
         with metrics.timer("event_manager.load_grouping_config"):
             # At this point we want to normalize the in_app values in case the
             # clients did not set this appropriately so far.
-            grouping_config = get_grouping_config_dict_for_event_data(
-                job["event"].data.data, project
-            )
+            if is_reprocessed:
+                # The customer might have changed grouping enhancements since
+                # the event was ingested -> make sure we get the fresh one for reprocessing.
+                grouping_config = get_grouping_config_dict_for_project(project)
+                # Write back grouping config because it might have changed since the
+                # event was ingested.
+                # NOTE: We could do this unconditionally (regardless of `is_processed`).
+                job["data"]["grouping_config"] = grouping_config
+            else:
+                grouping_config = get_grouping_config_dict_for_event_data(
+                    job["event"].data.data, project
+                )
 
         with sentry_sdk.start_span(op="event_manager.save.calculate_event_grouping"), metrics.timer(
             "event_manager.calculate_event_grouping"
@@ -845,12 +854,12 @@ def _nodestore_save_many(jobs):
 
         if job["group"]:
             event = job["event"]
-            data = event_processing_store.get(
+            unprocessed = event_processing_store.get(
                 cache_key_for_event({"project": event.project_id, "event_id": event.event_id}),
                 unprocessed=True,
             )
-            if data is not None:
-                subkeys["unprocessed"] = data
+            if unprocessed is not None:
+                subkeys["unprocessed"] = unprocessed
 
         job["event"].data["nodestore_insert"] = inserted_time
         job["event"].data.save(subkeys=subkeys)

+ 154 - 0
tests/sentry/tasks/test_reprocessing2.py

@@ -1,6 +1,7 @@
 import uuid
 from io import BytesIO
 from time import time
+from unittest import mock
 
 import pytest
 
@@ -8,6 +9,8 @@ from sentry import eventstore
 from sentry.attachments import attachment_cache
 from sentry.event_manager import EventManager
 from sentry.eventstore.processing import event_processing_store
+from sentry.grouping.enhancer import Enhancements
+from sentry.grouping.fingerprinting import FingerprintingRules
 from sentry.models import (
     Activity,
     EventAttachment,
@@ -18,6 +21,7 @@ from sentry.models import (
     UserReport,
 )
 from sentry.plugins.base.v2 import Plugin2
+from sentry.projectoptions.defaults import DEFAULT_GROUPING_CONFIG
 from sentry.reprocessing2 import is_group_finished
 from sentry.tasks.reprocessing2 import reprocess_group
 from sentry.tasks.store import preprocess_event
@@ -434,3 +438,153 @@ def test_nodestore_missing(
         )
 
     assert logs == ["reprocessing2.unprocessed_event.not_found"]
+
+
+@pytest.mark.django_db
+@pytest.mark.snuba
+def test_apply_new_fingerprinting_rules(
+    default_project,
+    reset_snuba,
+    register_event_preprocessor,
+    process_and_save,
+    burst_task_runner,
+):
+    """
+    Assert that after changing fingerprinting rules, the new fingerprinting config
+    is respected by reprocessing.
+    """
+
+    @register_event_preprocessor
+    def event_preprocessor(data):
+        extra = data.setdefault("extra", {})
+        extra.setdefault("processing_counter", 0)
+        extra["processing_counter"] += 1
+        return data
+
+    event_id1 = process_and_save({"message": "hello world 1"})
+    event_id2 = process_and_save({"message": "hello world 2"})
+
+    event1 = eventstore.get_event_by_id(default_project.id, event_id1)
+    event2 = eventstore.get_event_by_id(default_project.id, event_id2)
+
+    # Same group, because grouping scrubs integers from message:
+    assert event1.group.id == event2.group.id
+    original_issue_id = event1.group.id
+    assert event1.group.message == "hello world 2"
+
+    # Change fingerprinting rules
+    new_rules = FingerprintingRules.from_config_string(
+        """
+    message:"hello world 1" -> hw1 title="HW1"
+    """
+    )
+
+    with mock.patch(
+        "sentry.event_manager.get_fingerprinting_config_for_project", return_value=new_rules
+    ):
+        # Reprocess
+        with burst_task_runner() as burst_reprocess:
+            reprocess_group(default_project.id, event1.group_id)
+        burst_reprocess(max_jobs=100)
+
+    assert is_group_finished(event1.group_id)
+
+    # Events should now be in different groups:
+    event1 = eventstore.get_event_by_id(default_project.id, event_id1)
+    event2 = eventstore.get_event_by_id(default_project.id, event_id2)
+    assert event1.group.id != original_issue_id
+    assert event1.group.id != event2.group.id
+    assert event1.group.message == "hello world 1 HW1"
+    assert event2.group.message == "hello world 2"
+
+
+@pytest.mark.django_db
+@pytest.mark.snuba
+def test_apply_new_stack_trace_rules(
+    default_project,
+    reset_snuba,
+    register_event_preprocessor,
+    process_and_save,
+    burst_task_runner,
+):
+    """
+    Assert that after changing stack trace rules, the new grouping config
+    is respected by reprocessing.
+    """
+
+    @register_event_preprocessor
+    def event_preprocessor(data):
+        extra = data.setdefault("extra", {})
+        extra.setdefault("processing_counter", 0)
+        extra["processing_counter"] += 1
+        return data
+
+    event_id1 = process_and_save(
+        {
+            "platform": "native",
+            "stacktrace": {
+                "frames": [
+                    {
+                        "function": "a",
+                    },
+                    {
+                        "function": "b",
+                    },
+                ]
+            },
+        }
+    )
+    event_id2 = process_and_save(
+        {
+            "platform": "native",
+            "stacktrace": {
+                "frames": [
+                    {
+                        "function": "a",
+                    },
+                    {
+                        "function": "b",
+                    },
+                    {
+                        "function": "c",
+                    },
+                ]
+            },
+        }
+    )
+
+    event1 = eventstore.get_event_by_id(default_project.id, event_id1)
+    event2 = eventstore.get_event_by_id(default_project.id, event_id2)
+
+    original_grouping_config = event1.data["grouping_config"]
+
+    # Different group, because different stack trace
+    assert event1.group.id != event2.group.id
+    original_issue_id = event1.group.id
+
+    with mock.patch(
+        "sentry.event_manager.get_grouping_config_dict_for_project",
+        return_value={
+            "id": DEFAULT_GROUPING_CONFIG,
+            "enhancements": Enhancements.from_config_string(
+                "function:c -group",
+                bases=[],
+            ).dumps(),
+        },
+    ):
+        # Reprocess
+        with burst_task_runner() as burst_reprocess:
+            reprocess_group(default_project.id, event1.group_id)
+            reprocess_group(default_project.id, event2.group_id)
+        burst_reprocess(max_jobs=100)
+
+    assert is_group_finished(event1.group_id)
+    assert is_group_finished(event2.group_id)
+
+    # Events should now be in same group because of stack trace rule
+    event1 = eventstore.get_event_by_id(default_project.id, event_id1)
+    event2 = eventstore.get_event_by_id(default_project.id, event_id2)
+    assert event1.group.id != original_issue_id
+    assert event1.group.id == event2.group.id
+
+    assert event1.data["grouping_config"] != original_grouping_config