Browse Source

feat(perf-issues): introduce Grouptype-aware abstract methods for GroupSerializer stats data (#38099)

Split up of this [PR](https://github.com/getsentry/sentry/pull/37900)

To support Performance Issue Details, we need to update the various GroupSerializers to handle heterogeneous data appropriate for the group type. The current GroupSerializer assumes Issues are tightly coupled to errors. This PR attempts to separate the seams where we need to do issue type-specific stuff.

Things done:
- Introduce new abstractmethods `_seen_stats_error` and `_seen_stats_performance` to base class which will get called in the appropriate time to retrieve the stats data for a given list of issues based on type.
- Cleanup class design by marking methods as staticmethods. 
- Add as much type information as possible for clarity.
Gilbert Szeto 2 years ago
parent
commit
c2aee724f4

+ 99 - 32
src/sentry/api/serializers/models/group.py

@@ -2,6 +2,7 @@ from __future__ import annotations
 
 import itertools
 import logging
+from abc import ABC, abstractmethod
 from collections import defaultdict
 from datetime import datetime, timedelta
 from typing import (
@@ -64,6 +65,7 @@ from sentry.search.events.constants import RELEASE_STAGE_ALIAS
 from sentry.search.events.filter import convert_search_filter_to_snuba_query
 from sentry.tagstore.snuba.backend import fix_tag_value_data
 from sentry.tsdb.snuba import SnubaTSDB
+from sentry.types.issues import GroupCategory
 from sentry.utils.cache import cache
 from sentry.utils.json import JSONData
 from sentry.utils.safe import safe_execute
@@ -158,7 +160,14 @@ class BaseGroupSerializerResponse(BaseGroupResponseOptional):
     annotations: Sequence[str]
 
 
-class GroupSerializerBase(Serializer):
+class SeenStats(TypedDict):
+    times_seen: int
+    first_seen: datetime
+    last_seen: datetime
+    user_count: int
+
+
+class GroupSerializerBase(Serializer, ABC):
     def __init__(
         self,
         collapse=None,
@@ -167,7 +176,9 @@ class GroupSerializerBase(Serializer):
         self.collapse = collapse
         self.expand = expand
 
-    def get_attrs(self, item_list, user):
+    def get_attrs(
+        self, item_list: Sequence[Group], user: Any, **kwargs: Any
+    ) -> MutableMapping[Group, MutableMapping[str, Any]]:
         GroupMeta.objects.populate_cache(item_list)
 
         # Note that organization is necessary here for use in `_get_permalink` to avoid
@@ -283,7 +294,9 @@ class GroupSerializerBase(Serializer):
                 result[item].update(seen_stats.get(item, {}))
         return result
 
-    def serialize(self, obj, attrs, user) -> BaseGroupSerializerResponse:
+    def serialize(
+        self, obj: Group, attrs: MutableMapping[str, Any], user: Any, **kwargs: Any
+    ) -> BaseGroupSerializerResponse:
         status_details, status_label = self._get_status(attrs, obj)
         permalink = self._get_permalink(attrs, obj)
         is_subscribed, subscription_details = get_subscription_from_attributes(attrs)
@@ -325,18 +338,30 @@ class GroupSerializerBase(Serializer):
             group_dict.update(self._convert_seen_stats(attrs))
         return group_dict
 
-    def _expand(self, key):
+    @abstractmethod
+    def _seen_stats_error(
+        self, error_issue_list: Sequence[Group], user
+    ) -> Mapping[Group, SeenStats]:
+        pass
+
+    @abstractmethod
+    def _seen_stats_performance(
+        self, perf_issue_list: Sequence[Group], user
+    ) -> Mapping[Group, SeenStats]:
+        pass
+
+    def _expand(self, key) -> bool:
         if self.expand is None:
             return False
 
         return key in self.expand
 
-    def _collapse(self, key):
+    def _collapse(self, key) -> bool:
         if self.collapse is None:
             return False
         return key in self.collapse
 
-    def _get_status(self, attrs, obj):
+    def _get_status(self, attrs: MutableMapping[str, Any], obj: Group):
         status = obj.status
         status_details = {}
         if attrs["ignore_until"]:
@@ -392,7 +417,9 @@ class GroupSerializerBase(Serializer):
             status_label = "unresolved"
         return status_details, status_label
 
-    def _get_seen_stats(self, item_list, user):
+    def _get_seen_stats(
+        self, item_list: Sequence[Group], user
+    ) -> Optional[Mapping[Group, SeenStats]]:
         """
         Returns a dictionary keyed by item that includes:
             - times_seen
@@ -400,9 +427,25 @@ class GroupSerializerBase(Serializer):
             - last_seen
             - user_count
         """
-        raise NotImplementedError
+        if self._collapse("stats"):
+            return None
+
+        # partition the item_list by type
+        error_issues = [group for group in item_list if GroupCategory.ERROR == group.issue_category]
+        perf_issues = [
+            group for group in item_list if GroupCategory.PERFORMANCE == group.issue_category
+        ]
 
-    def _get_group_snuba_stats(self, item_list, seen_stats):
+        # bulk query for the seen_stats by type
+        error_stats = self._seen_stats_error(error_issues, user) or {}
+        perf_stats = (self._seen_stats_performance(perf_issues, user) if perf_issues else {}) or {}
+        agg_stats = {**error_stats, **perf_stats}
+        # combine results back
+        return {group: agg_stats.get(group, {}) for group in item_list}
+
+    def _get_group_snuba_stats(
+        self, item_list: Sequence[Group], seen_stats: Optional[Mapping[Group, SeenStats]]
+    ):
         start = self._get_start_from_seen_stats(seen_stats)
         unhandled = {}
 
@@ -449,7 +492,7 @@ class GroupSerializerBase(Serializer):
         return {group_id: {"unhandled": unhandled} for group_id, unhandled in unhandled.items()}
 
     @staticmethod
-    def _get_start_from_seen_stats(seen_stats):
+    def _get_start_from_seen_stats(seen_stats: Optional[Mapping[Group, SeenStats]]):
         # Try to figure out what is a reasonable time frame to look into stats,
         # based on a given "seen stats".  We try to pick a day prior to the earliest last seen,
         # but it has to be at least 14 days, and not more than 90 days ago.
@@ -502,7 +545,9 @@ class GroupSerializerBase(Serializer):
         )
 
     @staticmethod
-    def _resolve_resolutions(groups, user) -> Tuple[Mapping[int, Sequence[Any]], Mapping[int, Any]]:
+    def _resolve_resolutions(
+        groups: Sequence[Group], user
+    ) -> Tuple[Mapping[int, Sequence[Any]], Mapping[int, Any]]:
         resolved_groups = [i for i in groups if i.status == GroupStatus.RESOLVED]
         if not resolved_groups:
             return {}, {}
@@ -538,7 +583,7 @@ class GroupSerializerBase(Serializer):
         return _release_resolutions, _commit_resolutions
 
     @staticmethod
-    def _resolve_external_issue_annotations(groups) -> Mapping[int, Sequence[Any]]:
+    def _resolve_external_issue_annotations(groups: Sequence[Group]) -> Mapping[int, Sequence[Any]]:
         from sentry.models import PlatformExternalIssue
 
         # find the external issues for sentry apps and add them in
@@ -552,7 +597,9 @@ class GroupSerializerBase(Serializer):
         )
 
     @staticmethod
-    def _resolve_integration_annotations(org_id, groups) -> Sequence[Mapping[int, Sequence[Any]]]:
+    def _resolve_integration_annotations(
+        org_id: int, groups: Sequence[Group]
+    ) -> Sequence[Mapping[int, Sequence[Any]]]:
         from sentry.integrations import IntegrationFeatures
 
         integration_annotations = []
@@ -601,7 +648,7 @@ class GroupSerializerBase(Serializer):
         return annotations_for_group
 
     @staticmethod
-    def _is_authorized(user, organization_id):
+    def _is_authorized(user, organization_id: int):
         # If user is not logged in and member of the organization,
         # do not return the permalink which contains private information i.e. org name.
         request = env.request
@@ -624,7 +671,7 @@ class GroupSerializerBase(Serializer):
         return user.is_authenticated and user.get_orgs().filter(id=organization_id).exists()
 
     @staticmethod
-    def _get_permalink(attrs, obj):
+    def _get_permalink(attrs, obj: Group):
         if attrs["authorized"]:
             with sentry_sdk.start_span(op="GroupSerializerBase.serialize.permalink.build"):
                 return obj.get_absolute_url()
@@ -632,12 +679,12 @@ class GroupSerializerBase(Serializer):
             return None
 
     @staticmethod
-    def _convert_seen_stats(stats):
+    def _convert_seen_stats(attrs: SeenStats):
         return {
-            "count": str(stats["times_seen"]),
-            "userCount": stats["user_count"],
-            "firstSeen": stats["first_seen"],
-            "lastSeen": stats["last_seen"],
+            "count": str(attrs["times_seen"]),
+            "userCount": attrs["user_count"],
+            "firstSeen": attrs["first_seen"],
+            "lastSeen": attrs["last_seen"],
         }
 
 
@@ -647,7 +694,7 @@ class GroupSerializer(GroupSerializerBase):
         GroupSerializerBase.__init__(self)
         self.environment_func = environment_func if environment_func is not None else lambda: None
 
-    def _get_seen_stats(self, item_list, user):
+    def _seen_stats_error(self, item_list, user):
         try:
             environment = self.environment_func()
         except Environment.DoesNotExist:
@@ -689,11 +736,22 @@ class GroupSerializer(GroupSerializerBase):
 
         return attrs
 
+    def _seen_stats_performance(
+        self, perf_issue_list: Sequence[Group], user
+    ) -> Mapping[Group, SeenStats]:
+        # TODO(gilbert): implement this to return real data
+        if perf_issue_list:
+            raise NotImplementedError
+
+        return {}
+
 
 class SharedGroupSerializer(GroupSerializer):
-    def serialize(self, obj, attrs, user):
+    def serialize(
+        self, obj: Group, attrs: MutableMapping[str, Any], user: Any, **kwargs: Any
+    ) -> BaseGroupSerializerResponse:
         result = super().serialize(obj, attrs, user)
-        del result["annotations"]
+        del result["annotations"]  # type:ignore
         return result
 
 
@@ -774,6 +832,24 @@ class GroupSerializerSnuba(GroupSerializerBase):
             else []
         )
 
+    def _seen_stats_error(self, error_issue_list: Sequence[Group], user) -> Mapping[Any, SeenStats]:
+        return self._execute_seen_stats_query(
+            item_list=error_issue_list,
+            start=self.start,
+            end=self.end,
+            conditions=self.conditions,
+            environment_ids=self.environment_ids,
+        )
+
+    def _seen_stats_performance(
+        self, perf_issue_list: Sequence[Group], user
+    ) -> Mapping[Group, SeenStats]:
+        # TODO(gilbert): implement this to return real data
+        if perf_issue_list:
+            raise NotImplementedError
+
+        return {}
+
     def _execute_seen_stats_query(
         self, item_list, start=None, end=None, conditions=None, environment_ids=None
     ):
@@ -834,12 +910,3 @@ class GroupSerializerSnuba(GroupSerializerBase):
             }
 
         return attrs
-
-    def _get_seen_stats(self, item_list, user):
-        return self._execute_seen_stats_query(
-            item_list=item_list,
-            start=self.start,
-            end=self.end,
-            conditions=self.conditions,
-            environment_ids=self.environment_ids,
-        )

+ 99 - 49
src/sentry/api/serializers/models/group_stream.py

@@ -1,14 +1,23 @@
 from __future__ import annotations
 
 import functools
-from datetime import timedelta
+from abc import abstractmethod
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+from typing import Any, Mapping, MutableMapping, Optional, Sequence
 
 from django.utils import timezone
 
 from sentry import release_health, tsdb
-from sentry.api.serializers.models.group import GroupSerializer, GroupSerializerSnuba, snuba_tsdb
+from sentry.api.serializers.models.group import (
+    BaseGroupSerializerResponse,
+    GroupSerializer,
+    GroupSerializerSnuba,
+    SeenStats,
+    snuba_tsdb,
+)
 from sentry.constants import StatsPeriod
-from sentry.models import Environment
+from sentry.models import Environment, Group
 from sentry.models.groupinbox import get_inbox_details
 from sentry.models.groupowner import get_owner_details
 from sentry.utils import metrics
@@ -16,6 +25,13 @@ from sentry.utils.cache import cache
 from sentry.utils.hashlib import hash_values
 
 
+@dataclass
+class GroupStatsQueryArgs:
+    stats_period: Optional[str]
+    stats_period_start: Optional[datetime]
+    stats_period_end: Optional[datetime]
+
+
 class GroupStatsMixin:
     STATS_PERIOD_CHOICES = {
         "14d": StatsPeriod(14, timedelta(hours=24)),
@@ -35,16 +51,21 @@ class GroupStatsMixin:
     CUSTOM_SEGMENTS_12H = 35  # for 12h 36 segments, otherwise 15-16-17 bars is too few
     CUSTOM_ROLLUP_6H = timedelta(hours=6).total_seconds()  # rollups should be increments of 6hs
 
-    def query_tsdb(self, group_ids, query_params):
-        raise NotImplementedError
+    @abstractmethod
+    def query_tsdb(self, group_ids: Sequence[int], query_params: MutableMapping[str, Any]):
+        pass
 
-    def get_stats(self, item_list, user, **kwargs):
-        if self.stats_period:
+    def get_stats(
+        self, item_list: Sequence[Group], user, stats_query_args: GroupStatsQueryArgs, **kwargs
+    ):
+        if stats_query_args and stats_query_args.stats_period:
             # we need to compute stats at 1d (1h resolution), and 14d or a custom given period
             group_ids = [g.id for g in item_list]
 
-            if self.stats_period == "auto":
-                total_period = (self.stats_period_end - self.stats_period_start).total_seconds()
+            if stats_query_args.stats_period == "auto":
+                total_period = (
+                    stats_query_args.stats_period_end - stats_query_args.stats_period_start
+                ).total_seconds()
                 if total_period < timedelta(hours=24).total_seconds():
                     rollup = total_period / self.CUSTOM_SEGMENTS
                 elif total_period < self.CUSTOM_SEGMENTS * self.CUSTOM_ROLLUP_CHOICES["1h"]:
@@ -66,12 +87,12 @@ class GroupStatsMixin:
                     rollup = round(total_period / (self.CUSTOM_SEGMENTS * delta_day)) * delta_day
 
                 query_params = {
-                    "start": self.stats_period_start,
-                    "end": self.stats_period_end,
+                    "start": stats_query_args.stats_period_start,
+                    "end": stats_query_args.stats_period_end,
                     "rollup": int(rollup),
                 }
             else:
-                segments, interval = self.STATS_PERIOD_CHOICES[self.stats_period]
+                segments, interval = self.STATS_PERIOD_CHOICES[stats_query_args.stats_period]
                 now = timezone.now()
                 query_params = {
                     "start": now - ((segments - 1) * interval),
@@ -103,17 +124,27 @@ class StreamGroupSerializer(GroupSerializer, GroupStatsMixin):
         self.matching_event_id = matching_event_id
         self.matching_event_environment = matching_event_environment
 
-    def get_attrs(self, item_list, user):
+    def get_attrs(
+        self, item_list: Sequence[Group], user: Any, **kwargs: Any
+    ) -> MutableMapping[Group, MutableMapping[str, Any]]:
         attrs = super().get_attrs(item_list, user)
 
         if self.stats_period:
-            stats = self.get_stats(item_list, user)
+            stats = self.get_stats(
+                item_list,
+                user,
+                GroupStatsQueryArgs(
+                    self.stats_period, self.stats_period_start, self.stats_period_end
+                ),
+            )
             for item in item_list:
                 attrs[item].update({"stats": stats[item.id]})
 
         return attrs
 
-    def serialize(self, obj, attrs, user):
+    def serialize(
+        self, obj: Group, attrs: MutableMapping[str, Any], user: Any, **kwargs: Any
+    ) -> BaseGroupSerializerResponse:
         result = super().serialize(obj, attrs, user)
 
         if self.stats_period:
@@ -148,7 +179,9 @@ class TagBasedStreamGroupSerializer(StreamGroupSerializer):
         super().__init__(**kwargs)
         self.tags = tags
 
-    def serialize(self, obj, attrs, user):
+    def serialize(
+        self, obj: Group, attrs: MutableMapping[str, Any], user: Any, **kwargs: Any
+    ) -> BaseGroupSerializerResponse:
         result = super().serialize(obj, attrs, user)
         result["tagLastSeen"] = self.tags[obj.id].last_seen
         result["tagFirstSeen"] = self.tags[obj.id].first_seen
@@ -190,7 +223,9 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
         self.stats_period_end = stats_period_end
         self.matching_event_id = matching_event_id
 
-    def get_attrs(self, item_list, user):
+    def get_attrs(
+        self, item_list: Sequence[Group], user: Any, **kwargs: Any
+    ) -> MutableMapping[Group, MutableMapping[str, Any]]:
         if not self._collapse("base"):
             attrs = super().get_attrs(item_list, user)
         else:
@@ -202,7 +237,13 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
 
         if self.stats_period and not self._collapse("stats"):
             partial_get_stats = functools.partial(
-                self.get_stats, item_list=item_list, user=user, environment_ids=self.environment_ids
+                self.get_stats,
+                item_list=item_list,
+                user=user,
+                stats_query_args=GroupStatsQueryArgs(
+                    self.stats_period, self.stats_period_start, self.stats_period_end
+                ),
+                environment_ids=self.environment_ids,
             )
             stats = partial_get_stats()
             filtered_stats = (
@@ -271,7 +312,9 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
 
         return attrs
 
-    def serialize(self, obj, attrs, user):
+    def serialize(
+        self, obj: Group, attrs: MutableMapping[str, Any], user: Any, **kwargs: Any
+    ) -> BaseGroupSerializerResponse:
         if not self._collapse("base"):
             result = super().serialize(obj, attrs, user)
         else:
@@ -325,39 +368,46 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
             **query_params,
         )
 
-    def _get_seen_stats(self, item_list, user):
-        if not self._collapse("stats"):
-            partial_execute_seen_stats_query = functools.partial(
-                self._execute_seen_stats_query,
-                item_list=item_list,
-                environment_ids=self.environment_ids,
-                start=self.start,
-                end=self.end,
+    def _seen_stats_error(self, error_issue_list: Sequence[Group], user) -> Mapping[Any, SeenStats]:
+        partial_execute_seen_stats_query = functools.partial(
+            self._execute_seen_stats_query,
+            item_list=error_issue_list,
+            environment_ids=self.environment_ids,
+            start=self.start,
+            end=self.end,
+        )
+        time_range_result = partial_execute_seen_stats_query()
+        filtered_result = (
+            partial_execute_seen_stats_query(conditions=self.conditions)
+            if self.conditions and not self._collapse("filtered")
+            else None
+        )
+        if not self._collapse("lifetime"):
+            lifetime_result = (
+                partial_execute_seen_stats_query(start=None, end=None)
+                if self.start or self.end
+                else time_range_result
             )
-            time_range_result = partial_execute_seen_stats_query()
-            filtered_result = (
-                partial_execute_seen_stats_query(conditions=self.conditions)
-                if self.conditions and not self._collapse("filtered")
-                else None
+        else:
+            lifetime_result = None
+
+        for item in error_issue_list:
+            time_range_result[item].update(
+                {
+                    "filtered": filtered_result.get(item) if filtered_result else None,
+                    "lifetime": lifetime_result.get(item) if lifetime_result else None,
+                }
             )
-            if not self._collapse("lifetime"):
-                lifetime_result = (
-                    partial_execute_seen_stats_query(start=None, end=None)
-                    if self.start or self.end
-                    else time_range_result
-                )
-            else:
-                lifetime_result = None
+        return time_range_result
 
-            for item in item_list:
-                time_range_result[item].update(
-                    {
-                        "filtered": filtered_result.get(item) if filtered_result else None,
-                        "lifetime": lifetime_result.get(item) if lifetime_result else None,
-                    }
-                )
-            return time_range_result
-        return None
+    def _seen_stats_performance(
+        self, perf_issue_list: Sequence[Group], user
+    ) -> Mapping[Group, SeenStats]:
+        # TODO(gilbert): implement this to return real data
+        if perf_issue_list:
+            raise NotImplementedError
+
+        return {}
 
     def _build_session_cache_key(self, project_id):
         start_key = end_key = env_key = ""

+ 26 - 0
tests/snuba/api/endpoints/test_group_details.py

@@ -177,3 +177,29 @@ class GroupDetailsTest(APITestCase, SnubaTestCase):
                 )
             ]
         }
+
+    def test_collapse_stats_does_not_work(self):
+        """
+        'collapse' param should hide the stats data and not return anything in the response, but the impl
+        doesn't seem to respect this param.
+
+        include this test here in-case the endpoint behavior changes in the future.
+        """
+        self.login_as(user=self.user)
+
+        event = self.store_event(
+            data={"timestamp": iso_format(before_now(minutes=3))},
+            project_id=self.project.id,
+        )
+        group = event.group
+
+        url = f"/api/0/issues/{group.id}/"
+
+        response = self.client.get(url, {"collapse": ["stats"]}, format="json")
+        assert response.status_code == 200
+        assert int(response.data["id"]) == event.group.id
+        assert response.data["stats"]  # key shouldn't be present
+        assert response.data["count"] is not None  # key shouldn't be present
+        assert response.data["userCount"] is not None  # key shouldn't be present
+        assert response.data["firstSeen"] is not None  # key shouldn't be present
+        assert response.data["lastSeen"] is not None  # key shouldn't be present

+ 1 - 0
tests/snuba/api/endpoints/test_organization_group_index.py

@@ -1678,6 +1678,7 @@ class GroupListTest(APITestCase, SnubaTestCase):
         assert "firstSeen" not in response.data[0]
         assert "lastSeen" not in response.data[0]
         assert "count" not in response.data[0]
+        assert "userCount" not in response.data[0]
         assert "lifetime" not in response.data[0]
         assert "filtered" not in response.data[0]
 

+ 1 - 0
tests/snuba/api/endpoints/test_organization_group_index_stats.py

@@ -57,6 +57,7 @@ class GroupListTest(APITestCase, SnubaTestCase):
         assert "firstSeen" in response_data[0]
         assert "lastSeen" in response_data[0]
         assert "count" in response_data[0]
+        assert "userCount" in response_data[0]
         assert "lifetime" in response_data[0]
         assert "filtered" in response_data[0]