Browse Source

feat(seer grouping): Add call to Seer to optimized grouping flow in ingest (#75266)

There are currently two paths ingest can follow when saving an event: the existing `_save_aggregate` (which runs unnecessary calculations when a project is in a grouping config transition) and the newer `_save_aggregate_new` (which only does extra calculations when necessary). Until now, Seer grouping had only been integrated into the first path, meaning we had to turn off the second path entirely. This adds the Seer logic to the new path, so we don't have to choose between the features.

Note: The tests look like they've changed more than they actually have. In reality, all that's happened is that the body of each test has been parameterized and wrapped in the following code, which forces it to run through both paths mentioned above:

```
for use_optimized_grouping, existing_event_message, new_event_message in [
    (True, "Dogs are great!", "Adopt don't shop"),
    (False, "Maisey is silly", "Charlie is goofy"),
]:
    with patch(
        "sentry.event_manager.project_uses_optimized_grouping",
        return_value=use_optimized_grouping,
    ):
```
Katie Byers 7 months ago
parent
commit
71989f9e96
2 changed files with 116 additions and 60 deletions
  1. 12 5
      src/sentry/event_manager.py
  2. 104 55
      tests/sentry/event_manager/grouping/test_seer_grouping.py

+ 12 - 5
src/sentry/event_manager.py

@@ -1719,17 +1719,24 @@ def _save_aggregate_new(
         secondary = get_hashes_and_grouphashes(job, maybe_run_secondary_grouping, metric_tags)
         all_grouphashes = primary.grouphashes + secondary.grouphashes
 
-        # Now we know for sure whether or not a group already exists, so handle both cases
         if secondary.existing_grouphash:
             group_info = handle_existing_grouphash(
                 job, secondary.existing_grouphash, all_grouphashes, group_processing_kwargs
             )
             result = "found_secondary"
-
+        # If we still haven't found a group, ask Seer for a match (if enabled for the project)
         else:
-            group_info = create_group_with_grouphashes(
-                job, all_grouphashes, group_processing_kwargs
-            )
+            seer_matched_grouphash = maybe_check_seer_for_matching_grouphash(event, primary.hashes)
+
+            if seer_matched_grouphash:
+                group_info = handle_existing_grouphash(
+                    job, seer_matched_grouphash, all_grouphashes, group_processing_kwargs
+                )
+            # If we *still* haven't found a group into which to put the event, create a new group
+            else:
+                group_info = create_group_with_grouphashes(
+                    job, all_grouphashes, group_processing_kwargs
+                )
             result = "no_match"
 
     # From here on out, we're just doing housekeeping

+ 104 - 55
tests/sentry/event_manager/grouping/test_seer_grouping.py

@@ -93,74 +93,123 @@ class SeerEventManagerGroupingTest(TestCase):
     @patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True)
     @patch("sentry.grouping.ingest.seer.get_seer_similar_issues", return_value=({}, None))
     def test_calls_seer_if_no_group_found(self, mock_get_seer_similar_issues: MagicMock, _):
-        save_new_event({"message": "Dogs are great!"}, self.project)
-        assert mock_get_seer_similar_issues.call_count == 1
+        for use_optimized_grouping, event_message in [
+            (True, "Dogs are great!"),
+            (False, "Adopt don't shop"),
+        ]:
+            with patch(
+                "sentry.event_manager.project_uses_optimized_grouping",
+                return_value=use_optimized_grouping,
+            ):
+                save_new_event({"message": event_message}, self.project)
+                assert (
+                    mock_get_seer_similar_issues.call_count == 1
+                ), f"Case {use_optimized_grouping} failed"
+                mock_get_seer_similar_issues.reset_mock()
 
     @patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True)
     @patch("sentry.grouping.ingest.seer.get_seer_similar_issues", return_value=({}, None))
     def test_bypasses_seer_if_group_found(self, mock_get_seer_similar_issues: MagicMock, _):
-        existing_event = save_new_event({"message": "Dogs are great!"}, self.project)
-        assert mock_get_seer_similar_issues.call_count == 1
-
-        new_event = save_new_event({"message": "Dogs are great!"}, self.project)
-        assert existing_event.group_id == new_event.group_id
-        assert mock_get_seer_similar_issues.call_count == 1  # didn't get called again
+        for use_optimized_grouping, event_message in [
+            (True, "Dogs are great!"),
+            (False, "Adopt don't shop"),
+        ]:
+            with patch(
+                "sentry.event_manager.project_uses_optimized_grouping",
+                return_value=use_optimized_grouping,
+            ):
+                existing_event = save_new_event({"message": event_message}, self.project)
+                assert mock_get_seer_similar_issues.call_count == 1
+
+                new_event = save_new_event({"message": event_message}, self.project)
+                assert existing_event.group_id == new_event.group_id
+                assert mock_get_seer_similar_issues.call_count == 1  # didn't get called again
+
+                mock_get_seer_similar_issues.reset_mock()
 
     @patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True)
     def test_stores_seer_results_in_metadata(self, _):
-        existing_event = save_new_event({"message": "Dogs are great!"}, self.project)
-
-        seer_result_data = SeerSimilarIssueData(
-            parent_hash=NonNone(existing_event.get_primary_hash()),
-            parent_group_id=NonNone(existing_event.group_id),
-            stacktrace_distance=0.01,
-            message_distance=0.05,
-            should_group=True,
-        )
+        for use_optimized_grouping, existing_event_message, new_event_message in [
+            (True, "Dogs are great!", "Adopt don't shop"),
+            (False, "Maisey is silly", "Charlie is goofy"),
+        ]:
+            with patch(
+                "sentry.event_manager.project_uses_optimized_grouping",
+                return_value=use_optimized_grouping,
+            ):
+                existing_event = save_new_event({"message": existing_event_message}, self.project)
+
+                seer_result_data = SeerSimilarIssueData(
+                    parent_hash=NonNone(existing_event.get_primary_hash()),
+                    parent_group_id=NonNone(existing_event.group_id),
+                    stacktrace_distance=0.01,
+                    message_distance=0.05,
+                    should_group=True,
+                )
+
+                with patch(
+                    "sentry.grouping.ingest.seer.get_similarity_data_from_seer",
+                    return_value=[seer_result_data],
+                ):
+                    new_event = save_new_event({"message": new_event_message}, self.project)
+                    expected_metadata = {
+                        "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
+                        "results": [asdict(seer_result_data)],
+                    }
 
-        with patch(
-            "sentry.grouping.ingest.seer.get_similarity_data_from_seer",
-            return_value=[seer_result_data],
-        ):
-            new_event = save_new_event({"message": "Adopt don't shop"}, self.project)
-            expected_metadata = {
-                "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
-                "results": [asdict(seer_result_data)],
-            }
-
-        assert new_event.data["seer_similarity"] == expected_metadata
+                assert new_event.data["seer_similarity"] == expected_metadata
 
     @with_feature("projects:similarity-embeddings-grouping")
     @patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True)
     def test_assigns_event_to_neighbor_group_if_found(self, _):
-        existing_event = save_new_event({"message": "Dogs are great!"}, self.project)
-
-        seer_result_data = SeerSimilarIssueData(
-            parent_hash=NonNone(existing_event.get_primary_hash()),
-            parent_group_id=NonNone(existing_event.group_id),
-            stacktrace_distance=0.01,
-            message_distance=0.05,
-            should_group=True,
-        )
-
-        with patch(
-            "sentry.grouping.ingest.seer.get_similarity_data_from_seer",
-            return_value=[seer_result_data],
-        ) as mock_get_similarity_data:
-            new_event = save_new_event({"message": "Adopt don't shop"}, self.project)
-
-            assert mock_get_similarity_data.call_count == 1
-            assert existing_event.group_id == new_event.group_id
+        for use_optimized_grouping, existing_event_message, new_event_message in [
+            (True, "Dogs are great!", "Adopt don't shop"),
+            (False, "Maisey is silly", "Charlie is goofy"),
+        ]:
+            with patch(
+                "sentry.event_manager.project_uses_optimized_grouping",
+                return_value=use_optimized_grouping,
+            ):
+                existing_event = save_new_event({"message": existing_event_message}, self.project)
+
+                seer_result_data = SeerSimilarIssueData(
+                    parent_hash=NonNone(existing_event.get_primary_hash()),
+                    parent_group_id=NonNone(existing_event.group_id),
+                    stacktrace_distance=0.01,
+                    message_distance=0.05,
+                    should_group=True,
+                )
+
+                with patch(
+                    "sentry.grouping.ingest.seer.get_similarity_data_from_seer",
+                    return_value=[seer_result_data],
+                ) as mock_get_similarity_data:
+                    new_event = save_new_event({"message": new_event_message}, self.project)
+
+                    assert mock_get_similarity_data.call_count == 1
+                    assert existing_event.group_id == new_event.group_id
+
+                    mock_get_similarity_data.reset_mock()
 
     @with_feature("projects:similarity-embeddings-grouping")
     @patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True)
     def test_creates_new_group_if_no_neighbor_found(self, _):
-        existing_event = save_new_event({"message": "Dogs are great!"}, self.project)
-
-        with patch(
-            "sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[]
-        ) as mock_get_similarity_data:
-            new_event = save_new_event({"message": "Adopt don't shop"}, self.project)
-
-            assert mock_get_similarity_data.call_count == 1
-            assert existing_event.group_id != new_event.group_id
+        for use_optimized_grouping, existing_event_message, new_event_message in [
+            (True, "Dogs are great!", "Adopt don't shop"),
+            (False, "Maisey is silly", "Charlie is goofy"),
+        ]:
+            with patch(
+                "sentry.event_manager.project_uses_optimized_grouping",
+                return_value=use_optimized_grouping,
+            ):
+                existing_event = save_new_event({"message": existing_event_message}, self.project)
+
+                with patch(
+                    "sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[]
+                ) as mock_get_similarity_data:
+                    new_event = save_new_event({"message": new_event_message}, self.project)
+
+                    assert mock_get_similarity_data.call_count == 1
+                    assert existing_event.group_id != new_event.group_id
+
+                    mock_get_similarity_data.reset_mock()