|
@@ -10,11 +10,12 @@ from django.conf import settings
|
|
|
from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable
|
|
|
|
|
|
from sentry.api.endpoints.group_similar_issues_embeddings import get_stacktrace_string
|
|
|
+from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
|
|
|
from sentry.grouping.grouping_info import get_grouping_info
|
|
|
from sentry.issues.occurrence_consumer import EventLookupError
|
|
|
from sentry.models.group import Group
|
|
|
from sentry.models.grouphash import GroupHash
|
|
|
-from sentry.seer.utils import CreateGroupingRecordData
|
|
|
+from sentry.seer.utils import CreateGroupingRecordData, RawSeerSimilarIssueData
|
|
|
from sentry.tasks.backfill_seer_grouping_records import (
|
|
|
GroupStacktraceData,
|
|
|
backfill_seer_grouping_records,
|
|
@@ -80,7 +81,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
}
|
|
|
|
|
|
def create_group_event_rows(self, num: int) -> Mapping[str, Any]:
|
|
|
- """Create num events and their corresponding group rows"""
|
|
|
+ """
|
|
|
+ Create num events and their corresponding group rows. Set times_seen for the corresponding
|
|
|
+ group to 5.
|
|
|
+ """
|
|
|
rows, events, messages = [], [], {}
|
|
|
function_names = [f"function_{str(i)}" for i in range(num)]
|
|
|
type_names = [f"Error{str(i)}" for i in range(num)]
|
|
@@ -93,6 +97,8 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
}
|
|
|
event = self.store_event(data=data, project_id=self.project.id, assert_no_errors=False)
|
|
|
events.append(event)
|
|
|
+ event.group.times_seen = 5
|
|
|
+ event.group.save()
|
|
|
messages.update({event.group.id: event.group.message})
|
|
|
rows.append(
|
|
|
{
|
|
@@ -119,6 +125,8 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
self.event = self.store_event(
|
|
|
data={"exception": EXCEPTION}, project_id=self.project.id, assert_no_errors=False
|
|
|
)
|
|
|
+ self.event.group.times_seen = 5
|
|
|
+ self.event.group.save()
|
|
|
group_hashes = GroupHash.objects.all().distinct("group_id")
|
|
|
self.group_hashes = {group_hash.group_id: group_hash.hash for group_hash in group_hashes}
|
|
|
|
|
@@ -146,7 +154,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
self.project, event.event_id, event.group_id, event.group.message, hash
|
|
|
)
|
|
|
expected_group_data = CreateGroupingRecordData(
|
|
|
- hash=hash, project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hash,
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
assert group_data == expected_group_data
|
|
|
assert stacktrace_string == EXCEPTION_STACKTRACE_STRING
|
|
@@ -218,6 +229,7 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
expected_event_ids = {event.event_id for event in events}
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
+ group_id=event.group.id,
|
|
|
hash=self.group_hashes[event.group.id],
|
|
|
project_id=self.project.id,
|
|
|
message=event.group.message,
|
|
@@ -236,9 +248,12 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
"backfill_grouping_records._lookup_event_bulk.hit_ratio", 100, sample_rate=1.0
|
|
|
)
|
|
|
|
|
|
+ @patch("time.sleep", return_value=None)
|
|
|
@patch("sentry.nodestore.backend.get_multi")
|
|
|
@patch("sentry.tasks.backfill_seer_grouping_records.logger")
|
|
|
- def test_lookup_group_data_stacktrace_bulk_exceptions(self, mock_logger, mock_get_multi):
|
|
|
+ def test_lookup_group_data_stacktrace_bulk_exceptions(
|
|
|
+ self, mock_logger, mock_get_multi, mock_sleep
|
|
|
+ ):
|
|
|
"""
|
|
|
Test cases where ServiceUnavailable or DeadlineExceeded exceptions occur in bulk data
|
|
|
lookup
|
|
@@ -298,7 +313,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
) = lookup_group_data_stacktrace_bulk(self.project, rows, messages, hashes)
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -335,7 +353,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
) = lookup_group_data_stacktrace_bulk(self.project, rows, messages, hashes)
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -371,7 +392,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
) = lookup_group_data_stacktrace_bulk(self.project, rows, messages, hashes)
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -398,7 +422,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -425,6 +452,7 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
stacktrace_string = get_stacktrace_string(grouping_info)
|
|
|
group_data.append(
|
|
|
CreateGroupingRecordData(
|
|
|
+ group_id=event.group.id,
|
|
|
hash=self.group_hashes[event.group.id],
|
|
|
project_id=self.project.id,
|
|
|
message=event.group.message,
|
|
@@ -445,7 +473,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
events = self.bulk_events
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -473,6 +504,7 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
stacktrace_string = get_stacktrace_string(grouping_info)
|
|
|
group_data.append(
|
|
|
CreateGroupingRecordData(
|
|
|
+ group_id=event.group.id,
|
|
|
hash=self.group_hashes[event.group.id],
|
|
|
project_id=self.project.id,
|
|
|
message=event.group.message,
|
|
@@ -497,7 +529,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
events = self.bulk_events[:-1]
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -539,7 +574,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
events = self.bulk_events[:-1]
|
|
|
expected_group_data = [
|
|
|
CreateGroupingRecordData(
|
|
|
- hash=hashes[event.group.id], project_id=self.project.id, message=event.group.message
|
|
|
+ group_id=event.group.id,
|
|
|
+ hash=hashes[event.group.id],
|
|
|
+ project_id=self.project.id,
|
|
|
+ message=event.group.message,
|
|
|
)
|
|
|
for event in events
|
|
|
]
|
|
@@ -562,47 +600,47 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
@django_db_all
|
|
|
@with_feature("projects:similarity-embeddings-backfill")
|
|
|
@patch("sentry.tasks.backfill_seer_grouping_records.post_bulk_grouping_records")
|
|
|
- def test_backfill_seer_grouping_records_success(self, mock_post_bulk_grouping_records):
|
|
|
+ def test_backfill_seer_grouping_records_success_simple(self, mock_post_bulk_grouping_records):
|
|
|
"""
|
|
|
Test that the metadata is set for all groups showing that the record has been created.
|
|
|
"""
|
|
|
- mock_post_bulk_grouping_records.return_value = {"success": True}
|
|
|
+ mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
|
|
|
|
|
|
with TaskRunner():
|
|
|
- backfill_seer_grouping_records(self.project.id, 0)
|
|
|
+ backfill_seer_grouping_records(self.project.id, None)
|
|
|
|
|
|
- for group in Group.objects.filter(project_id=self.project.id):
|
|
|
- assert group.data["metadata"].get("embeddings_info") == {
|
|
|
- "nn_model_version": 0,
|
|
|
- "group_hash": json.dumps([self.group_hashes[group.id]]),
|
|
|
+ groups = Group.objects.filter(project_id=self.project.id)
|
|
|
+ for group in groups:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") == {
|
|
|
+ "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
|
|
|
+ "request_hash": self.group_hashes[group.id],
|
|
|
}
|
|
|
redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
- last_processed_id = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
- assert last_processed_id != 0
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == len(groups)
|
|
|
|
|
|
@django_db_all
|
|
|
- @patch(
|
|
|
- "sentry.tasks.backfill_seer_grouping_records.lookup_group_data_stacktrace_bulk_with_fallback"
|
|
|
- )
|
|
|
+ @patch("time.sleep", return_value=None)
|
|
|
+ @patch("sentry.nodestore.backend.get_multi")
|
|
|
+ @patch("sentry.tasks.backfill_seer_grouping_records.lookup_event")
|
|
|
def test_backfill_seer_grouping_records_failure(
|
|
|
- self, mock_lookup_group_data_stacktrace_bulk_with_fallback
|
|
|
+ self, mock_lookup_event, mock_get_multi, mock_sleep
|
|
|
):
|
|
|
"""
|
|
|
Test that the group metadata and redis last processed id aren't updated on a failure.
|
|
|
"""
|
|
|
- mock_lookup_group_data_stacktrace_bulk_with_fallback.side_effect = ServiceUnavailable(
|
|
|
- message="Service Unavailable"
|
|
|
- )
|
|
|
+ mock_lookup_event.side_effect = ServiceUnavailable(message="Service Unavailable")
|
|
|
+ mock_get_multi.side_effect = ServiceUnavailable(message="Service Unavailable")
|
|
|
|
|
|
with TaskRunner():
|
|
|
- backfill_seer_grouping_records(self.project.id, 0)
|
|
|
+ backfill_seer_grouping_records(self.project.id, None)
|
|
|
|
|
|
redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
- last_processed_id = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
- assert last_processed_id == 0
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == 0
|
|
|
|
|
|
for group in Group.objects.filter(project_id=self.project.id):
|
|
|
- assert not group.data["metadata"].get("embeddings_info")
|
|
|
+ assert not group.data["metadata"].get("seer_similarity")
|
|
|
|
|
|
@django_db_all
|
|
|
def test_backfill_seer_grouping_records_no_feature(self):
|
|
@@ -612,10 +650,10 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
project = self.create_project(organization=self.organization)
|
|
|
|
|
|
with TaskRunner():
|
|
|
- backfill_seer_grouping_records(project, 0)
|
|
|
+ backfill_seer_grouping_records(project, None)
|
|
|
|
|
|
for group in Group.objects.filter(project_id=self.project.id):
|
|
|
- assert not group.data["metadata"].get("embeddings_info")
|
|
|
+ assert not group.data["metadata"].get("seer_similarity")
|
|
|
|
|
|
@django_db_all
|
|
|
@with_feature("projects:similarity-embeddings-backfill")
|
|
@@ -627,16 +665,222 @@ class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
|
|
|
"""
|
|
|
Test that the metadata is set for all groups showing that the record has been created.
|
|
|
"""
|
|
|
- mock_post_bulk_grouping_records.return_value = {"success": True}
|
|
|
+ mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": []}
|
|
|
mock_delete_grouping_records.return_value = True
|
|
|
with TaskRunner():
|
|
|
backfill_seer_grouping_records(self.project.id, 0, dry_run=True)
|
|
|
|
|
|
+ groups = Group.objects.filter(project_id=self.project.id)
|
|
|
+ for group in groups:
|
|
|
+ assert not group.data["metadata"].get("seer_similarity") == {
|
|
|
+ "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
|
|
|
+ "request_hash": self.group_hashes[group.id],
|
|
|
+ }
|
|
|
+ redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == len(groups)
|
|
|
+
|
|
|
+ @with_feature("projects:similarity-embeddings-backfill")
|
|
|
+ @patch("sentry.tasks.backfill_seer_grouping_records.post_bulk_grouping_records")
|
|
|
+ def test_backfill_seer_grouping_records_groups_1_times_seen(
|
|
|
+ self, mock_post_bulk_grouping_records
|
|
|
+ ):
|
|
|
+ """
|
|
|
+ Test that different metadata is set for groups where times_seen > 1 and times_seen == 1.
|
|
|
+ """
|
|
|
+ mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
|
|
|
+
|
|
|
+ function_names = [f"new_function_{str(i)}" for i in range(5)]
|
|
|
+ type_names = [f"NewError{str(i)}" for i in range(5)]
|
|
|
+ value_names = ["error with value" for i in range(5)]
|
|
|
+ groups_seen_once = []
|
|
|
+ for i in range(5):
|
|
|
+ data = {
|
|
|
+ "exception": self.create_exception_values(
|
|
|
+ function_names[i], type_names[i], value_names[i]
|
|
|
+ )
|
|
|
+ }
|
|
|
+ event = self.store_event(data=data, project_id=self.project.id, assert_no_errors=False)
|
|
|
+ groups_seen_once.append(event.group)
|
|
|
+
|
|
|
+ with TaskRunner():
|
|
|
+ backfill_seer_grouping_records(self.project.id, None)
|
|
|
+
|
|
|
for group in Group.objects.filter(project_id=self.project.id):
|
|
|
- assert not group.data["metadata"].get("embeddings_info") == {
|
|
|
- "nn_model_version": 0,
|
|
|
- "group_hash": json.dumps([self.group_hashes[group.id]]),
|
|
|
+ if group not in groups_seen_once:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") == {
|
|
|
+ "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
|
|
|
+ "request_hash": self.group_hashes[group.id],
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") == {"times_seen_once": True}
|
|
|
+
|
|
|
+ redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == len(
|
|
|
+ Group.objects.filter(project_id=self.project.id, times_seen__gt=1)
|
|
|
+ )
|
|
|
+
|
|
|
+ @with_feature("projects:similarity-embeddings-backfill")
|
|
|
+ @patch("sentry.tasks.backfill_seer_grouping_records.post_bulk_grouping_records")
|
|
|
+ def test_backfill_seer_grouping_records_groups_have_neighbor(
|
|
|
+ self, mock_post_bulk_grouping_records
|
|
|
+ ):
|
|
|
+ """
|
|
|
+ Test that groups that have nearest neighbors, do not get records created for them in
|
|
|
+ grouping_records.
|
|
|
+ Test that the metadata of groups that have nearest neighbors and those that have records
|
|
|
+ created are different.
|
|
|
+ """
|
|
|
+ # Create groups with 1 < times_seen < 5
|
|
|
+ # The groups that will be similar to these groups, have times_seen = 5
|
|
|
+ function_names = [f"another_function_{str(i)}" for i in range(5)]
|
|
|
+ type_names = [f"AnotherError{str(i)}" for i in range(5)]
|
|
|
+ value_names = ["error with value" for i in range(5)]
|
|
|
+ groups_with_neighbor = {}
|
|
|
+ for i in range(5):
|
|
|
+ data = {
|
|
|
+ "exception": self.create_exception_values(
|
|
|
+ function_names[i], type_names[i], value_names[i]
|
|
|
+ )
|
|
|
+ }
|
|
|
+ event = self.store_event(data=data, project_id=self.project.id, assert_no_errors=False)
|
|
|
+ event.group.times_seen = 2
|
|
|
+ event.group.save()
|
|
|
+ # Arbitrarily choose a parent group's hash that has times_seen = 5
|
|
|
+ parent_group = Group.objects.filter(times_seen__gt=2).first()
|
|
|
+ parent_group_hash = GroupHash.objects.filter(group_id=parent_group.id).first()
|
|
|
+ groups_with_neighbor[str(event.group.id)] = RawSeerSimilarIssueData(
|
|
|
+ stacktrace_distance=0.01,
|
|
|
+ message_distance=0.01,
|
|
|
+ should_group=True,
|
|
|
+ parent_hash=parent_group_hash.hash,
|
|
|
+ )
|
|
|
+
|
|
|
+ mock_post_bulk_grouping_records.return_value = {
|
|
|
+ "success": True,
|
|
|
+ "groups_with_neighbor": groups_with_neighbor,
|
|
|
+ }
|
|
|
+
|
|
|
+ with TaskRunner():
|
|
|
+ backfill_seer_grouping_records(self.project.id, None)
|
|
|
+
|
|
|
+ groups = Group.objects.filter(project_id=self.project.id, times_seen__gt=1)
|
|
|
+ for group in groups:
|
|
|
+ if str(group.id) not in groups_with_neighbor:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") == {
|
|
|
+ "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
|
|
|
+ "request_hash": self.group_hashes[group.id],
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ request_hash = GroupHash.objects.get(group_id=group.id).hash
|
|
|
+ parent_group_id = Group.objects.filter(times_seen__gt=2).first().id
|
|
|
+ assert group.data["metadata"].get("seer_similarity") == {
|
|
|
+ "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
|
|
|
+ "request_hash": request_hash,
|
|
|
+ "results": [
|
|
|
+ {
|
|
|
+ "stacktrace_distance": 0.01,
|
|
|
+ "message_distance": 0.01,
|
|
|
+ "should_group": True,
|
|
|
+ "parent_hash": groups_with_neighbor[str(group.id)]["parent_hash"],
|
|
|
+ "parent_group_id": parent_group_id,
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ }
|
|
|
+
|
|
|
+ redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == len(groups)
|
|
|
+
|
|
|
+ @with_feature("projects:similarity-embeddings-backfill")
|
|
|
+ @patch("sentry.tasks.backfill_seer_grouping_records.logger")
|
|
|
+ @patch("sentry.tasks.backfill_seer_grouping_records.post_bulk_grouping_records")
|
|
|
+ def test_backfill_seer_grouping_records_groups_has_invalid_neighbor(
|
|
|
+ self, mock_post_bulk_grouping_records, mock_logger
|
|
|
+ ):
|
|
|
+ """
|
|
|
+ Test that groups that have nearest neighbors that do not exist, do not have their metadata
|
|
|
+ updated.
|
|
|
+ """
|
|
|
+ # Create group with 1 < times_seen < 5
|
|
|
+ group_with_neighbor = {}
|
|
|
+ data = {
|
|
|
+ "exception": self.create_exception_values(
|
|
|
+ "another_function!", "AnotherError!", "error with value"
|
|
|
+ )
|
|
|
+ }
|
|
|
+ event = self.store_event(data=data, project_id=self.project.id, assert_no_errors=False)
|
|
|
+ event.group.times_seen = 2
|
|
|
+ event.group.save()
|
|
|
+ # Make the similar group a hash that does not exist
|
|
|
+ group_with_neighbor[str(event.group.id)] = RawSeerSimilarIssueData(
|
|
|
+ stacktrace_distance=0.01,
|
|
|
+ message_distance=0.01,
|
|
|
+ should_group=True,
|
|
|
+ parent_hash="00000000000000000000000000000000",
|
|
|
+ )
|
|
|
+
|
|
|
+ mock_post_bulk_grouping_records.return_value = {
|
|
|
+ "success": True,
|
|
|
+ "groups_with_neighbor": group_with_neighbor,
|
|
|
+ }
|
|
|
+
|
|
|
+ with TaskRunner():
|
|
|
+ backfill_seer_grouping_records(self.project.id, None)
|
|
|
+
|
|
|
+ groups = Group.objects.filter(project_id=self.project.id, times_seen__gt=1)
|
|
|
+ for group in groups:
|
|
|
+ if str(group.id) not in group_with_neighbor:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") == {
|
|
|
+ "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
|
|
|
+ "request_hash": self.group_hashes[group.id],
|
|
|
+ }
|
|
|
+ else:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") is None
|
|
|
+ mock_logger.exception.assert_called_with(
|
|
|
+ "tasks.backfill_seer_grouping_records.invalid_parent_group",
|
|
|
+ extra={
|
|
|
+ "project_id": self.project.id,
|
|
|
+ "group_id": group.id,
|
|
|
+ "parent_hash": "00000000000000000000000000000000",
|
|
|
+ },
|
|
|
+ )
|
|
|
+
|
|
|
+ redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == len(groups)
|
|
|
+
|
|
|
+ @django_db_all
|
|
|
+ @with_feature("projects:similarity-embeddings-backfill")
|
|
|
+ @patch("sentry.tasks.backfill_seer_grouping_records.post_bulk_grouping_records")
|
|
|
+ def test_backfill_seer_grouping_records_multiple_batches(self, mock_post_bulk_grouping_records):
|
|
|
+ """
|
|
|
+ Test that the metadata is set for all 21 groups showing that the record has been created,
|
|
|
+ where 21 > the batch size, 20.
|
|
|
+ """
|
|
|
+ function_names = [f"another_function_{str(i)}" for i in range(10)]
|
|
|
+ type_names = [f"AnotherError{str(i)}" for i in range(10)]
|
|
|
+ value_names = ["error with value" for _ in range(10)]
|
|
|
+ for i in range(10):
|
|
|
+ data = {
|
|
|
+ "exception": self.create_exception_values(
|
|
|
+ function_names[i], type_names[i], value_names[i]
|
|
|
+ )
|
|
|
}
|
|
|
+ event = self.store_event(data=data, project_id=self.project.id, assert_no_errors=False)
|
|
|
+ event.group.times_seen = 2
|
|
|
+ event.group.save()
|
|
|
+
|
|
|
+ mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
|
|
|
+
|
|
|
+ with TaskRunner():
|
|
|
+ backfill_seer_grouping_records(self.project.id, None)
|
|
|
+
|
|
|
+ groups = Group.objects.filter(project_id=self.project.id)
|
|
|
+ for group in groups:
|
|
|
+ assert group.data["metadata"].get("seer_similarity") is not None
|
|
|
+
|
|
|
redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
|
|
|
- last_processed_id = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
- assert last_processed_id != 0
|
|
|
+ last_processed_index = int(redis_client.get(make_backfill_redis_key(self.project.id)) or 0)
|
|
|
+ assert last_processed_index == len(groups)
|