Browse Source

ref(seer grouping): Stop using `projects:similarity-embeddings-metadata` flag (#74914)

When we first started working on the Seer grouping project, we were making a distiction between storing Seer results as metadata and using those results for grouping. The idea was to use storing but not using the results could serve as a way to do a dry run of the feature.

In the meantime, however, we've gotten rid of that distinction, and now both flags (`projects:similarity-embeddings-metadata` and `projects:similarity-embeddings-grouping`) check the same enablement list. We can therefore stop checking the metadata flag and just check the grouping flag, with an eye to eventually removing the metadata flag all together. This does that, and includes the following changes:

- Stop checking the metdata flag as part of `should_call_seer_for_grouping`.
- Stop checking the grouping flag as part of `get_seer_similar_issues`, as now we'll only call it if we already know the flag is on.
- Remove any tests testing behavior when the two flags have different values.
- Fix a test (`test_sends_expected_data_to_seer`) which had a bug that the flag check in `get_seer_similar_issues` was hiding.
Katie Byers 7 months ago
parent
commit
0f048de08c

+ 3 - 9
src/sentry/grouping/ingest/seer.py

@@ -57,9 +57,7 @@ def should_call_seer_for_grouping(event: Event, primary_hashes: CalculatedHashes
 
 
 def _project_has_similarity_grouping_enabled(project: Project) -> bool:
-    has_either_seer_grouping_feature = features.has(
-        "projects:similarity-embeddings-metadata", project
-    ) or features.has("projects:similarity-embeddings-grouping", project)
+    has_seer_grouping_flag_on = features.has("projects:similarity-embeddings-grouping", project)
 
     # TODO: This is a hack to get ingest to turn on for projects as soon as they're backfilled. When
     # the backfill script completes, we turn on this option, enabling ingest immediately rather than
@@ -67,7 +65,7 @@ def _project_has_similarity_grouping_enabled(project: Project) -> bool:
     # projects have been backfilled, the option (and this check) can go away.
     has_been_backfilled = project.get_option("sentry:similarity_backfill_completed")
 
-    return has_either_seer_grouping_feature or has_been_backfilled
+    return has_seer_grouping_flag_on or has_been_backfilled
 
 
 # TODO: Here we're including events with hybrid fingerprints (ones which are `{{ default }}`
@@ -199,11 +197,7 @@ def get_seer_similar_issues(
     )
     parent_group = (
         Group.objects.filter(id=seer_results[0].parent_group_id).first()
-        if (
-            seer_results
-            and seer_results[0].should_group
-            and features.has("projects:similarity-embeddings-grouping", event.project)
-        )
+        if seer_results and seer_results[0].should_group
         else None
     )
 

+ 2 - 70
tests/sentry/event_manager/grouping/test_seer_grouping.py

@@ -54,12 +54,7 @@ class SeerEventManagerGroupingTest(TestCase):
             ),
         ):
 
-            with Feature(
-                {
-                    "projects:similarity-embeddings-metadata": False,
-                    "projects:similarity-embeddings-grouping": False,
-                }
-            ):
+            with Feature({"projects:similarity-embeddings-grouping": False}):
                 new_event = save_new_event({"message": "Adopt don't shop"}, self.project)
 
                 # We checked whether to make the call, but didn't go through with it
@@ -74,12 +69,7 @@ class SeerEventManagerGroupingTest(TestCase):
                 should_call_seer_spy.reset_mock()
                 get_seer_similar_issues_spy.reset_mock()
 
-            with Feature(
-                {
-                    "projects:similarity-embeddings-metadata": True,
-                    "projects:similarity-embeddings-grouping": False,
-                }
-            ):
+            with Feature({"projects:similarity-embeddings-grouping": True}):
                 new_event = save_new_event({"message": "Maisey is silly"}, self.project)
                 expected_metadata = {**metadata_base, "request_hash": new_event.get_primary_hash()}
 
@@ -87,63 +77,6 @@ class SeerEventManagerGroupingTest(TestCase):
                 assert should_call_seer_spy.call_count == 1
                 assert get_seer_similar_issues_spy.call_count == 1
 
-                # Metadata returned and stored
-                assert get_seer_similar_issues_return_values[0][0] == expected_metadata
-                assert (
-                    NonNone(new_event.group).data["metadata"]["seer_similarity"]
-                    == expected_metadata
-                )
-                assert new_event.data["seer_similarity"] == expected_metadata
-
-                # No parent group returned or used (even though `should_group` is True)
-                assert get_seer_similar_issues_return_values[0][1] is None
-                assert new_event.group_id != existing_event.group_id
-
-                should_call_seer_spy.reset_mock()
-                get_seer_similar_issues_spy.reset_mock()
-                get_seer_similar_issues_return_values.pop()
-
-            with Feature(
-                {
-                    "projects:similarity-embeddings-metadata": False,
-                    "projects:similarity-embeddings-grouping": True,
-                }
-            ):
-                new_event = save_new_event({"message": "Charlie is goofy"}, self.project)
-                expected_metadata = {**metadata_base, "request_hash": new_event.get_primary_hash()}
-
-                # We checked whether to make the call, and then made it
-                assert should_call_seer_spy.call_count == 1
-                assert get_seer_similar_issues_spy.call_count == 1
-
-                # Metadata returned and stored (metadata flag being off doesn't matter because
-                # grouping flag takes precedence)
-                assert get_seer_similar_issues_return_values[0][0] == expected_metadata
-                assert new_event.data["seer_similarity"] == expected_metadata
-
-                # Parent group returned and used
-                assert get_seer_similar_issues_return_values[0][1] == existing_event.group
-                assert new_event.group_id == existing_event.group_id
-
-                should_call_seer_spy.reset_mock()
-                get_seer_similar_issues_spy.reset_mock()
-                get_seer_similar_issues_return_values.pop()
-
-            with Feature(
-                {
-                    "projects:similarity-embeddings-metadata": True,
-                    "projects:similarity-embeddings-grouping": True,
-                }
-            ):
-                new_event = save_new_event(
-                    {"message": "Cori and Bodhi are ridiculous"}, self.project
-                )
-                expected_metadata = {**metadata_base, "request_hash": new_event.get_primary_hash()}
-
-                # We checked whether to make the call, and then made it
-                assert should_call_seer_spy.call_count == 1
-                assert get_seer_similar_issues_spy.call_count == 1
-
                 # Metadata returned and stored
                 assert get_seer_similar_issues_return_values[0][0] == expected_metadata
                 assert new_event.data["seer_similarity"] == expected_metadata
@@ -208,7 +141,6 @@ class SeerEventManagerGroupingTest(TestCase):
                 "results": [asdict(seer_result_data)],
             }
 
-        assert NonNone(new_event.group).data["metadata"]["seer_similarity"] == expected_metadata
         assert new_event.data["seer_similarity"] == expected_metadata
 
     @with_feature("projects:similarity-embeddings-grouping")

+ 8 - 51
tests/sentry/grouping/ingest/test_seer.py

@@ -46,36 +46,20 @@ class ShouldCallSeerTest(TestCase):
         self.primary_hashes = self.event.get_hashes()
 
     def test_obeys_feature_enablement_check(self):
-        for metadata_flag, grouping_flag, backfill_completed_option, expected_result in [
-            # TODO: This manual cartesian product business is gross, but thankfully it's temporary -
-            # the metadata flag is about to go away and the backfill completed option will go away
-            # once all projects are backfilled.
-            (False, False, None, False),
-            (True, False, None, True),
-            (False, True, None, True),
-            (True, True, None, True),
-            (False, False, 11211231, True),
-            (True, False, 11211231, True),
-            (False, True, 11211231, True),
-            (True, True, 11211231, True),
+        for grouping_flag, backfill_completed_option, expected_result in [
+            (False, None, False),
+            (True, None, True),
+            (False, 11211231, True),
+            (True, 11211231, True),
         ]:
-            with (
-                Feature(
-                    {
-                        "projects:similarity-embeddings-metadata": metadata_flag,
-                        "projects:similarity-embeddings-grouping": grouping_flag,
-                    }
-                ),
-                # Having too many cases above makes us hit the project rate limit if we don't patch this
-                patch("sentry.grouping.ingest.seer._ratelimiting_enabled", return_value=False),
-            ):
+            with Feature({"projects:similarity-embeddings-grouping": grouping_flag}):
                 self.project.update_option(
                     "sentry:similarity_backfill_completed", backfill_completed_option
                 )
                 assert (
                     should_call_seer_for_grouping(self.event, self.primary_hashes)
                     is expected_result
-                ), f"Case (metadata {metadata_flag}, grouping {grouping_flag}, backfill completed {backfill_completed_option}) failed."
+                ), f"Case (grouping {grouping_flag}, backfill completed {backfill_completed_option}) failed."
 
     @with_feature("projects:similarity-embeddings-grouping")
     def test_obeys_content_filter(self):
@@ -186,7 +170,7 @@ class GetSeerSimilarIssuesTest(TestCase):
         )
         self.new_event_hashes = CalculatedHashes(["20130809201315042012311220122111"])
 
-    @patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer")
+    @patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[])
     def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock):
         new_event = Event(
             project_id=self.project.id,
@@ -234,31 +218,6 @@ class GetSeerSimilarIssuesTest(TestCase):
                 }
             )
 
-    @with_feature({"projects:similarity-embeddings-grouping": False})
-    def test_returns_metadata_but_no_group_if_seer_grouping_flag_off(self):
-        seer_result_data = SeerSimilarIssueData(
-            parent_hash=NonNone(self.existing_event.get_primary_hash()),
-            parent_group_id=NonNone(self.existing_event.group_id),
-            stacktrace_distance=0.01,
-            message_distance=0.05,
-            should_group=True,
-        )
-        expected_metadata = {
-            "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
-            "request_hash": self.new_event_hashes.hashes[0],
-            "results": [asdict(seer_result_data)],
-        }
-
-        with patch(
-            "sentry.grouping.ingest.seer.get_similarity_data_from_seer",
-            return_value=[seer_result_data],
-        ):
-            assert get_seer_similar_issues(self.new_event, self.new_event_hashes) == (
-                expected_metadata,
-                None,  # No group returned, even though `should_group` is True
-            )
-
-    @with_feature("projects:similarity-embeddings-grouping")
     def test_returns_metadata_and_group_if_sufficiently_close_group_found(self):
         seer_result_data = SeerSimilarIssueData(
             parent_hash=NonNone(self.existing_event.get_primary_hash()),
@@ -282,7 +241,6 @@ class GetSeerSimilarIssuesTest(TestCase):
                 self.existing_event.group,
             )
 
-    @with_feature("projects:similarity-embeddings-grouping")
     def test_returns_metadata_but_no_group_if_similar_group_insufficiently_close(self):
         seer_result_data = SeerSimilarIssueData(
             parent_hash=NonNone(self.existing_event.get_primary_hash()),
@@ -306,7 +264,6 @@ class GetSeerSimilarIssuesTest(TestCase):
                 None,
             )
 
-    @with_feature("projects:similarity-embeddings-grouping")
     def test_returns_no_group_and_empty_metadata_if_no_similar_group_found(self):
         expected_metadata = {
             "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,