Browse Source

feat(escalating_issues): Support profiling and performance issues (#48257)

Non-error groups use the issue platform dataset.
This PR adds support for it.

Fixes #48179
Armen Zambrano G 1 year ago
parent
commit
073d1b88bd
2 changed files with 92 additions and 16 deletions
  1. 41 15
      src/sentry/issues/escalating.py
  2. 51 1
      tests/sentry/issues/test_escalating.py

+ 41 - 15
src/sentry/issues/escalating.py

@@ -5,7 +5,7 @@ This is later used for generating group forecasts for determining when a group m
 import logging
 from collections import defaultdict
 from datetime import datetime, timedelta
-from typing import Dict, List, Sequence, Tuple, TypedDict
+from typing import Dict, List, Optional, Sequence, Tuple, TypedDict
 
 from snuba_sdk import (
     Column,
@@ -73,6 +73,34 @@ def query_groups_past_counts(groups: Sequence[Group]) -> List[GroupsCountRespons
         return all_results
 
     start_date, end_date = _start_and_end_dates()
+
+    # Error groups use the events dataset while profile and perf groups use the issue platform dataset
+    error_groups: List[Group] = []
+    other_groups: List[Group] = []
+    for g in groups:
+        if g.issue_category == GroupCategory.ERROR:
+            error_groups.append(g)
+        else:
+            other_groups.append(g)
+
+    all_results += _process_groups(error_groups, start_date, end_date, GroupCategory.ERROR)
+    all_results += _process_groups(other_groups, start_date, end_date)
+
+    return all_results
+
+
+def _process_groups(
+    groups: Sequence[Group],
+    start_date: datetime,
+    end_date: datetime,
+    category: Optional[GroupCategory] = None,
+) -> List[GroupsCountResponse]:
+    """Given a list of groups, query Snuba for their hourly bucket count.
+    The category defines which Snuba dataset and entity we query."""
+    all_results = []  # type: ignore[var-annotated]
+    if not groups:
+        return all_results
+
     group_ids_by_project = _extract_project_and_group_ids(groups)
     proj_ids, group_ids = [], []
     processed_projects = 0
@@ -98,7 +126,7 @@ def query_groups_past_counts(groups: Sequence[Group]) -> List[GroupsCountRespons
 
         # TODO: Write this as a dispatcher type task and fire off a separate task per proj_ids
         all_results += _query_with_pagination(
-            organization_id, proj_ids, group_ids, start_date, end_date
+            organization_id, proj_ids, group_ids, start_date, end_date, category
         )
         # We're ready for a new set of projects and ids
         proj_ids, group_ids = [], []
@@ -112,15 +140,16 @@ def _query_with_pagination(
     group_ids: Sequence[int],
     start_date: datetime,
     end_date: datetime,
+    category: Optional[GroupCategory],
 ) -> List[GroupsCountResponse]:
     """Query Snuba for event counts for the given list of project ids and groups ids in
     a time range."""
     all_results = []
     offset = 0
     while True:
-        query = _generate_query(project_ids, group_ids, offset, start_date, end_date)
+        query = _generate_query(project_ids, group_ids, offset, start_date, end_date, category)
         request = Request(
-            dataset=_issue_category_dataset(GroupCategory.ERROR),
+            dataset=_issue_category_dataset(category),
             app_id=REFERRER,
             query=query,
             tenant_ids={"referrer": REFERRER, "organization_id": organization_id},
@@ -140,12 +169,13 @@ def _generate_query(
     offset: int,
     start_date: datetime,
     end_date: datetime,
+    category: Optional[GroupCategory],
 ) -> Query:
     """This simply generates a query based on the passed parameters"""
     group_id_col = Column("group_id")
     proj_id_col = Column("project_id")
     return Query(
-        match=Entity(_issue_category_entity(GroupCategory.ERROR)),
+        match=Entity(_issue_category_entity(category)),
         select=[
             proj_id_col,
             group_id_col,
@@ -262,15 +292,11 @@ def parse_groups_past_counts(response: Sequence[GroupsCountResponse]) -> ParsedG
     return group_counts
 
 
-def _issue_category_dataset(category: GroupCategory) -> Dataset:
-    if category == GroupCategory.ERROR:
-        return Dataset.Events.value
-    else:
-        raise NotImplementedError
+def _issue_category_dataset(category: Optional[GroupCategory]) -> Dataset:
+    return Dataset.Events.value if category == GroupCategory.ERROR else Dataset.IssuePlatform.value
 
 
-def _issue_category_entity(category: GroupCategory) -> EntityKey:
-    if category == GroupCategory.ERROR:
-        return EntityKey.Events.value
-    else:
-        raise NotImplementedError
+def _issue_category_entity(category: Optional[GroupCategory]) -> EntityKey:
+    return (
+        EntityKey.Events.value if category == GroupCategory.ERROR else EntityKey.IssuePlatform.value
+    )

+ 51 - 1
tests/sentry/issues/test_escalating.py

@@ -14,13 +14,16 @@ from sentry.issues.escalating import (
     query_groups_past_counts,
 )
 from sentry.issues.escalating_group_forecast import EscalatingGroupForecast
+from sentry.issues.grouptype import GroupCategory, ProfileFileIOGroupType
 from sentry.models import Group
 from sentry.models.group import GroupStatus
 from sentry.models.groupinbox import GroupInbox
 from sentry.testutils import SnubaTestCase, TestCase
+from sentry.testutils.cases import PerformanceIssueTestCase
 from sentry.types.group import GroupSubStatus
 from sentry.utils.cache import cache
 from sentry.utils.snuba import to_start_of_hour
+from tests.sentry.issues.test_utils import SearchIssueTestMixin
 
 TIME_YESTERDAY = (datetime.now() - timedelta(hours=24)).replace(hour=6)
 
@@ -52,7 +55,13 @@ class BaseGroupCounts(SnubaTestCase, TestCase):  # type: ignore[misc]
         return last_event
 
 
-class HistoricGroupCounts(BaseGroupCounts):
+class HistoricGroupCounts(
+    BaseGroupCounts,
+    PerformanceIssueTestCase,  # type: ignore[misc]
+    SearchIssueTestMixin,
+):
+    """Test that querying Snuba for the hourly counts for groups works as expected."""
+
     def _create_hourly_bucket(self, count: int, event: Event) -> GroupsCountResponse:
         """It simplifies writing the expected data structures"""
         return {
@@ -68,6 +77,47 @@ class HistoricGroupCounts(BaseGroupCounts):
             self._create_hourly_bucket(1, event)
         ]
 
+    @freeze_time(TIME_YESTERDAY)
+    def test_query_different_group_categories(self) -> None:
+        from django.utils import timezone
+
+        # This builds an error group and a profiling group
+        profile_error_event, _, profile_issue_occurrence = self.store_search_issue(
+            project_id=self.project.id,
+            user_id=0,
+            fingerprints=[f"{ProfileFileIOGroupType.type_id}-group1"],
+            insert_time=timezone.now() - timedelta(minutes=1),
+        )
+        assert len(Group.objects.all()) == 2
+
+        with self.options({"performance.issues.send_to_issues_platform": True}):
+            perf_event = self.create_performance_issue()
+
+        error_event = self._create_events_for_group()
+
+        # store_search_issue created two groups
+        assert len(Group.objects.all()) == 4
+        assert profile_error_event.group.issue_category == GroupCategory.ERROR
+        assert error_event.group.issue_category == GroupCategory.ERROR
+        assert profile_issue_occurrence.group.issue_category == GroupCategory.PROFILE  # type: ignore[union-attr]
+        assert perf_event.group.issue_category == GroupCategory.PERFORMANCE
+
+        profile_issue_occurrence_bucket = {
+            "count()": 1,
+            "group_id": profile_issue_occurrence.group.id,  # type: ignore[union-attr]
+            "hourBucket": to_start_of_hour(profile_issue_occurrence.group.first_seen),  # type: ignore[union-attr]
+            "project_id": self.project.id,
+        }
+
+        # Error groups will show up at the beginning of the list even if they
+        # were created later
+        assert query_groups_past_counts(Group.objects.all()) == [
+            self._create_hourly_bucket(1, profile_error_event),
+            self._create_hourly_bucket(1, error_event),
+            profile_issue_occurrence_bucket,
+            self._create_hourly_bucket(1, perf_event),
+        ]
+
     def test_pagination(self) -> None:
         group1_bucket1_event = self._create_events_for_group(count=2, hours_ago=1, group="group-1")
         group2_bucket1_event = self._create_events_for_group(count=1, hours_ago=2, group="group-2")