Browse Source

feat(issue-platform): GroupDetails endpoint support for profiliing issues (#42904)

Adds issue-platform support for the various data returned in
GroupDetails.

It's not ideal we're using the legacy query format - we'll look to port
this out to SnQL later down the line.

Resolves getsentry/sentry#42626
Gilbert Szeto 2 years ago
parent
commit
657e7ddac1

+ 2 - 4
src/sentry/api/endpoints/group_details.py

@@ -21,7 +21,7 @@ from sentry.api.helpers.group_index import (
 )
 from sentry.api.serializers import GroupSerializer, GroupSerializerSnuba, serialize
 from sentry.api.serializers.models.plugin import PluginSerializer, is_plugin_deprecated
-from sentry.issues.constants import ISSUE_TSDB_GROUP_MODELS
+from sentry.issues.constants import get_issue_tsdb_group_model
 from sentry.models import Activity, Group, GroupSeen, GroupSubscriptionManager, UserReport
 from sentry.models.groupinbox import get_inbox_details
 from sentry.models.groupowner import get_owner_details
@@ -123,9 +123,7 @@ class GroupDetailsEndpoint(GroupEndpoint, EnvironmentMixin):
     @staticmethod
     def __group_hourly_daily_stats(group: Group, environment_ids: Sequence[int]):
         get_range = functools.partial(tsdb.get_range, environment_ids=environment_ids)
-        # choose the model based off the group.category
-        model = ISSUE_TSDB_GROUP_MODELS[group.issue_category]
-
+        model = get_issue_tsdb_group_model(group.issue_category)
         now = timezone.now()
         hourly_stats = tsdb.rollup(
             get_range(model=model, keys=[group.id], end=now, start=now - timedelta(days=1)),

+ 17 - 6
src/sentry/api/serializers/models/group_stream.py

@@ -332,12 +332,15 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
     def query_tsdb(
         self, groups: Sequence[Group], query_params, conditions=None, environment_ids=None, **kwargs
     ):
-        error_issue_ids = [
-            group.id for group in groups if GroupCategory.ERROR == group.issue_category
-        ]
-        perf_issue_ids = [
-            group.id for group in groups if GroupCategory.PERFORMANCE == group.issue_category
-        ]
+        error_issue_ids, perf_issue_ids, generic_issue_ids = [], [], []
+        for group in groups:
+            if GroupCategory.ERROR == group.issue_category:
+                error_issue_ids.append(group.id)
+            elif GroupCategory.PERFORMANCE == group.issue_category:
+                perf_issue_ids.append(group.id)
+            elif group.issue_category not in (GroupCategory.ERROR, GroupCategory.PERFORMANCE):
+                generic_issue_ids.append(group.id)
+
         results = {}
         get_range = functools.partial(
             snuba_tsdb.get_range,
@@ -351,6 +354,9 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
             results.update(
                 get_range(model=snuba_tsdb.models.group_performance, keys=perf_issue_ids)
             )
+        if generic_issue_ids:
+            results.update(get_range(model=snuba_tsdb.models.group_generic, keys=generic_issue_ids))
+
         return results
 
     def _seen_stats_error(
@@ -363,6 +369,11 @@ class StreamGroupSerializerSnuba(GroupSerializerSnuba, GroupStatsMixin):
     ) -> Mapping[Group, SeenStats]:
         return self.__seen_stats_impl(perf_issue_list, self._execute_perf_seen_stats_query)
 
+    def _seen_stats_generic(
+        self, generic_issue_list: Sequence[Group], user
+    ) -> Mapping[Group, SeenStats]:
+        return self.__seen_stats_impl(generic_issue_list, self._execute_generic_seen_stats_query)
+
     def __seen_stats_impl(
         self,
         error_issue_list: Sequence[Group],

+ 11 - 0
src/sentry/issues/constants.py

@@ -1,4 +1,5 @@
 from sentry import tsdb
+from sentry.tsdb.base import TSDBModel
 from sentry.types.issues import GroupCategory
 
 ISSUE_TSDB_GROUP_MODELS = {
@@ -9,3 +10,13 @@ ISSUE_TSDB_USER_GROUP_MODELS = {
     GroupCategory.ERROR: tsdb.models.users_affected_by_group,
     GroupCategory.PERFORMANCE: tsdb.models.users_affected_by_perf_group,
 }
+
+
+def get_issue_tsdb_group_model(issue_category: GroupCategory) -> TSDBModel:
+    return ISSUE_TSDB_GROUP_MODELS.get(issue_category, tsdb.models.group_generic)
+
+
+def get_issue_tsdb_user_group_model(issue_category: GroupCategory) -> TSDBModel:
+    return ISSUE_TSDB_USER_GROUP_MODELS.get(
+        issue_category, tsdb.models.users_affected_by_generic_group
+    )

+ 3 - 3
src/sentry/models/groupsnooze.py

@@ -13,7 +13,7 @@ from sentry.db.models import (
     region_silo_only_model,
     sane_repr,
 )
-from sentry.issues.constants import ISSUE_TSDB_GROUP_MODELS, ISSUE_TSDB_USER_GROUP_MODELS
+from sentry.issues.constants import get_issue_tsdb_group_model, get_issue_tsdb_user_group_model
 from sentry.utils import metrics
 from sentry.utils.cache import cache
 
@@ -94,7 +94,7 @@ class GroupSnooze(Model):
         start = end - timedelta(minutes=self.window)
 
         rate = tsdb.get_sums(
-            model=ISSUE_TSDB_GROUP_MODELS[self.group.issue_category],
+            model=get_issue_tsdb_group_model(self.group.issue_category),
             keys=[self.group_id],
             start=start,
             end=end,
@@ -114,7 +114,7 @@ class GroupSnooze(Model):
         start = end - timedelta(minutes=self.user_window)
 
         rate = tsdb.get_distinct_counts_totals(
-            model=ISSUE_TSDB_USER_GROUP_MODELS[self.group.issue_category],
+            model=get_issue_tsdb_user_group_model(self.group.issue_category),
             keys=[self.group_id],
             start=start,
             end=end,

+ 4 - 4
src/sentry/rules/conditions/event_frequency.py

@@ -13,7 +13,7 @@ from django.utils import timezone
 
 from sentry import release_health, tsdb
 from sentry.eventstore.models import GroupEvent
-from sentry.issues.constants import ISSUE_TSDB_GROUP_MODELS, ISSUE_TSDB_USER_GROUP_MODELS
+from sentry.issues.constants import get_issue_tsdb_group_model, get_issue_tsdb_user_group_model
 from sentry.receivers.rules import DEFAULT_RULE_LABEL
 from sentry.rules import EventState
 from sentry.rules.conditions.base import EventCondition
@@ -228,7 +228,7 @@ class EventFrequencyCondition(BaseEventFrequencyCondition):
         self, event: GroupEvent, start: datetime, end: datetime, environment_id: str
     ) -> int:
         sums: Mapping[int, int] = self.tsdb.get_sums(
-            model=ISSUE_TSDB_GROUP_MODELS[event.group.issue_category],
+            model=get_issue_tsdb_group_model(event.group.issue_category),
             keys=[event.group_id],
             start=start,
             end=end,
@@ -250,7 +250,7 @@ class EventUniqueUserFrequencyCondition(BaseEventFrequencyCondition):
         self, event: GroupEvent, start: datetime, end: datetime, environment_id: str
     ) -> int:
         totals: Mapping[int, int] = self.tsdb.get_distinct_counts_totals(
-            model=ISSUE_TSDB_USER_GROUP_MODELS[event.group.issue_category],
+            model=get_issue_tsdb_user_group_model(event.group.issue_category),
             keys=[event.group_id],
             start=start,
             end=end,
@@ -357,7 +357,7 @@ class EventFrequencyPercentCondition(BaseEventFrequencyCondition):
             avg_sessions_in_interval = session_count_last_hour / (60 / interval_in_minutes)
 
             issue_count = self.tsdb.get_sums(
-                model=ISSUE_TSDB_GROUP_MODELS[event.group.issue_category],
+                model=get_issue_tsdb_group_model(event.group.issue_category),
                 keys=[event.group_id],
                 start=start,
                 end=end,

+ 5 - 0
src/sentry/tsdb/base.py

@@ -24,6 +24,9 @@ class TSDBModel(Enum):
     # number of transactions seen specific to a group
     group_performance = 10
 
+    # number of occurrences seen specific to a generic group
+    group_generic = 20
+
     # the number of events sent to the server
     project_total_received = 100
     # the number of events rejected due to rate limiting
@@ -46,6 +49,8 @@ class TSDBModel(Enum):
     users_affected_by_project = 301
     # distinct count of users that have been affected by an event in a performance group
     users_affected_by_perf_group = 302
+    # distinct count of users that have been affected by an event in a generic group
+    users_affected_by_generic_group = 303
 
     # frequent_organization_received_by_system = 400
     # frequent_organization_rejected_by_system = 401

+ 74 - 5
src/sentry/tsdb/snuba.py

@@ -78,6 +78,12 @@ class SnubaTSDB(BaseTSDB):
             [],
             [["arrayJoin", "group_ids", "group_id"]],
         ),
+        TSDBModel.group_generic: SnubaModelQuerySettings(
+            snuba.Dataset.IssuePlatform,
+            "group_id",
+            None,
+            [],
+        ),
         TSDBModel.release: SnubaModelQuerySettings(
             snuba.Dataset.Events, "tags[sentry:release]", None, [events_type_condition]
         ),
@@ -91,6 +97,12 @@ class SnubaTSDB(BaseTSDB):
             [],
             [["arrayJoin", "group_ids", "group_id"]],
         ),
+        TSDBModel.users_affected_by_generic_group: SnubaModelQuerySettings(
+            snuba.Dataset.IssuePlatform,
+            "group_id",
+            "tags[sentry:user]",
+            [],
+        ),
         TSDBModel.users_affected_by_project: SnubaModelQuerySettings(
             snuba.Dataset.Events, "project_id", "tags[sentry:user]", [events_type_condition]
         ),
@@ -193,6 +205,39 @@ class SnubaTSDB(BaseTSDB):
     def __init__(self, **options):
         super().__init__(**options)
 
+    def __manual_group_on_time_aggregation(self, rollup, time_column_alias) -> Sequence[Any]:
+        """
+        Explicitly builds an aggregation expression in-place of using a `TimeSeriesProcessor` on the snuba entity.
+        Older tables and queries that target that table had syntactic sugar on the `time` column and would apply
+        additional processing to re-write the query. For entities/models that don't have that special processing,
+        we need to manually insert the equivalent query to get the same result.
+        """
+
+        def rollup_agg(func: str):
+            return [
+                "toUnixTimestamp",
+                [[func, "timestamp"]],
+                time_column_alias,
+            ]
+
+        rollup_to_start_func = {
+            60: "toStartOfMinute",
+            3600: "toStartOfHour",
+            3600 * 24: "toDate",
+        }
+
+        # if we don't have an explicit function mapped to this rollup, we have to calculate it on the fly
+        # multiply(intDiv(toUInt32(toUnixTimestamp(timestamp)), granularity)))
+        special_rollup = [
+            "multiply",
+            [["intDiv", [["toUInt32", [["toUnixTimestamp", "timestamp"]]], rollup]], rollup],
+            time_column_alias,
+        ]
+
+        rollup_func = rollup_to_start_func.get(rollup)
+
+        return rollup_agg(rollup_func) if rollup_func else special_rollup
+
     def get_data(
         self,
         model,
@@ -222,6 +267,12 @@ class SnubaTSDB(BaseTSDB):
         ]:
             keys = list(set(map(lambda x: int(x), keys)))
 
+        model_requires_manual_group_on_time = model in (
+            TSDBModel.group_generic,
+            TSDBModel.users_affected_by_generic_group,
+        )
+        group_on_time_column_alias = "grouped_time"
+
         model_query_settings = self.model_query_settings.get(model)
 
         if model_query_settings is None:
@@ -240,7 +291,10 @@ class SnubaTSDB(BaseTSDB):
         if group_on_model and model_group is not None:
             groupby.append(model_group)
         if group_on_time:
-            groupby.append("time")
+            if not model_requires_manual_group_on_time:
+                groupby.append("time")
+            else:
+                groupby.append(group_on_time_column_alias)
         if aggregation == "count()" and model_aggregate is not None:
             # Special case, because count has different semantics, we change:
             # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
@@ -261,6 +315,11 @@ class SnubaTSDB(BaseTSDB):
         # timestamp of the last bucket to get the end time.
         rollup, series = self.get_optimal_rollup_series(start, end, rollup)
 
+        if group_on_time and model_requires_manual_group_on_time:
+            aggregations.append(
+                self.__manual_group_on_time_aggregation(rollup, group_on_time_column_alias)
+            )
+
         # If jitter_value is provided then we use it to offset the buckets we round start/end to by
         # up  to `rollup` seconds.
         series = self._add_jitter_to_series(series, start, rollup, jitter_value)
@@ -276,8 +335,10 @@ class SnubaTSDB(BaseTSDB):
 
         orderby = []
         if group_on_time:
-            orderby.append("-time")
-
+            if not model_requires_manual_group_on_time:
+                orderby.append("-time")
+            else:
+                orderby.append(f"-{group_on_time_column_alias}")
         if group_on_model and model_group is not None:
             orderby.append(model_group)
 
@@ -309,12 +370,20 @@ class SnubaTSDB(BaseTSDB):
             result = {}
 
         if group_on_time:
-            keys_map["time"] = series
+            if not model_requires_manual_group_on_time:
+                keys_map["time"] = series
+            else:
+                keys_map[group_on_time_column_alias] = series
 
         self.zerofill(result, groupby, keys_map)
         self.trim(result, groupby, keys)
 
-        return result
+        if group_on_time and model_requires_manual_group_on_time:
+            # unroll aggregated data
+            self.unnest(result, aggregated_as)
+            return result
+        else:
+            return result
 
     def zerofill(self, result, groups, flat_keys):
         """

+ 4 - 0
src/sentry/types/issues.py

@@ -52,6 +52,10 @@ PERFORMANCE_TYPES = [
     gt.value for gt, gc in GROUP_TYPE_TO_CATEGORY.items() if gc == GroupCategory.PERFORMANCE
 ]
 
+PROFILE_TYPES = [
+    gt.value for gt, gc in GROUP_TYPE_TO_CATEGORY.items() if gc == GroupCategory.PROFILE
+]
+
 
 def get_category_type_mapping():
     category_type_mapping = defaultdict(list)

+ 22 - 3
tests/sentry/api/serializers/test_group_stream.py

@@ -1,3 +1,4 @@
+import datetime
 from unittest import mock
 
 from freezegun import freeze_time
@@ -8,14 +9,15 @@ from sentry.api.serializers.models.group_stream import (
     StreamGroupSerializerSnuba,
 )
 from sentry.models import Environment
-from sentry.testutils import TestCase
+from sentry.testutils import SnubaTestCase, TestCase
 from sentry.testutils.helpers.datetime import before_now
 from sentry.testutils.silo import region_silo_test
-from sentry.types.issues import GroupType
+from sentry.types.issues import GroupCategory, GroupType
+from tests.sentry.issues.test_utils import SearchIssueTestMixin
 
 
 @region_silo_test
-class StreamGroupSerializerTestCase(TestCase):
+class StreamGroupSerializerTestCase(TestCase, SnubaTestCase, SearchIssueTestMixin):
     def test_environment(self):
         group = self.group
 
@@ -75,3 +77,20 @@ class StreamGroupSerializerTestCase(TestCase):
         assert serialized["issueType"] == "performance_n_plus_one_db_queries"
         assert [stat[1] for stat in serialized["stats"]["24h"][:-1]] == [0] * 23
         assert serialized["stats"]["24h"][-1][1] == 1
+
+    @freeze_time(before_now(days=1).replace(hour=13, minute=30, second=0, microsecond=0))
+    def test_profiling_issue(self):
+        proj = self.create_project()
+        cur_time = before_now(minutes=5).replace(tzinfo=datetime.timezone.utc)
+        event, occurrence, group_info = self.store_search_issue(
+            proj.id, 1, [f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group100"], None, cur_time
+        )
+        assert group_info
+        serialized = serialize(
+            group_info.group, serializer=StreamGroupSerializerSnuba(stats_period="24h")
+        )
+        assert serialized["count"] == "1"
+        assert serialized["issueCategory"] == str(GroupCategory.PROFILE.name).lower()
+        assert serialized["issueType"] == str(GroupType.PROFILE_BLOCKED_THREAD.name).lower()
+        assert [stat[1] for stat in serialized["stats"]["24h"][:-1]] == [0] * 23
+        assert serialized["stats"]["24h"][-1][1] == 1

+ 175 - 0
tests/snuba/tsdb/test_tsdb_backend.py

@@ -13,6 +13,7 @@ from sentry.tsdb.snuba import SnubaTSDB
 from sentry.types.issues import GroupType
 from sentry.utils.dates import to_datetime, to_timestamp
 from sentry.utils.snuba import aliased_query
+from tests.sentry.issues.test_utils import SearchIssueTestMixin
 
 
 def timestamp(d):
@@ -736,6 +737,180 @@ class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase, PerfIssueTransactio
         )
 
 
+@region_silo_test
+class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
+    def setUp(self):
+        super().setUp()
+
+        self.db = SnubaTSDB()
+        self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
+            hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
+        )
+        self.proj1 = self.create_project()
+
+        self.env1 = Environment.objects.get_or_create(
+            organization_id=self.proj1.organization_id, name="test"
+        )[0]
+        self.env2 = Environment.objects.get_or_create(
+            organization_id=self.proj1.organization_id, name="dev"
+        )[0]
+        defaultenv = ""
+
+        group1_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group1"
+        group2_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group2"
+
+        groups = {}
+        for r in range(0, 14400, 600):  # Every 10 min for 4 hours
+            event, occurrence, group_info = self.store_search_issue(
+                project_id=self.proj1.id,
+                # change every 55 min so some hours have 1 user, some have 2
+                user_id=r // 3300,
+                fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
+                # release_version=str(r // 3600) * 10,  # 1 per hour,
+                environment=[self.env1.name, None][(r // 7200) % 3],
+                insert_time=self.now + timedelta(seconds=r),
+            )
+            if group_info:
+                groups[group_info.group.id] = group_info.group
+
+        all_groups = list(groups.values())
+        self.proj1group1 = all_groups[0]
+        self.proj1group2 = all_groups[1]
+        self.defaultenv = Environment.objects.get(name=defaultenv)
+
+    def test_range_groups_mult(self):
+        now = (datetime.utcnow() - timedelta(days=1)).replace(
+            hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
+        )
+        dts = [now + timedelta(hours=i) for i in range(4)]
+        project = self.create_project()
+        group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group4"
+        groups = []
+        for i in range(0, 11):
+            _, _, group_info = self.store_search_issue(
+                project_id=project.id,
+                user_id=0,
+                fingerprints=[group_fingerprint],
+                environment=None,
+                insert_time=now + timedelta(minutes=i * 10),
+            )
+            if group_info:
+                groups.append(group_info.group)
+
+        group = groups[0]
+        assert self.db.get_range(
+            TSDBModel.group_generic,
+            [group.id],
+            dts[0],
+            dts[-1],
+            rollup=3600,
+        ) == {
+            group.id: [
+                (timestamp(dts[0]), 6),
+                (timestamp(dts[1]), 5),
+                (timestamp(dts[2]), 0),
+                (timestamp(dts[3]), 0),
+            ]
+        }
+
+    def test_range_groups_simple(self):
+        project = self.create_project()
+        now = (datetime.utcnow() - timedelta(days=1)).replace(
+            hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
+        )
+        group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group5"
+        ids = [1, 2, 3, 4, 5]
+        groups = []
+        for r in ids:
+            # for r in range(0, 9, 1):
+            event, occurrence, group_info = self.store_search_issue(
+                project_id=project.id,
+                # change every 55 min so some hours have 1 user, some have 2
+                user_id=r,
+                fingerprints=[group_fingerprint],
+                environment=None,
+                # release_version=str(r // 3600) * 10,  # 1 per hour,
+                insert_time=now,
+            )
+            if group_info:
+                groups.append(group_info.group)
+
+        group = groups[0]
+        dts = [now + timedelta(hours=i) for i in range(4)]
+        assert self.db.get_range(
+            TSDBModel.group_generic,
+            [group.id],
+            dts[0],
+            dts[-1],
+            rollup=3600,
+        ) == {
+            group.id: [
+                (timestamp(dts[0]), len(ids)),
+                (timestamp(dts[1]), 0),
+                (timestamp(dts[2]), 0),
+                (timestamp(dts[3]), 0),
+            ]
+        }
+
+    def test_range_groups(self):
+        dts = [self.now + timedelta(hours=i) for i in range(4)]
+        # Multiple groups
+        assert self.db.get_range(
+            TSDBModel.group_generic,
+            [self.proj1group1.id, self.proj1group2.id],
+            dts[0],
+            dts[-1],
+            rollup=3600,
+        ) == {
+            self.proj1group1.id: [
+                (timestamp(dts[0]), 3),
+                (timestamp(dts[1]), 3),
+                (timestamp(dts[2]), 3),
+                (timestamp(dts[3]), 3),
+            ],
+            self.proj1group2.id: [
+                (timestamp(dts[0]), 3),
+                (timestamp(dts[1]), 3),
+                (timestamp(dts[2]), 3),
+                (timestamp(dts[3]), 3),
+            ],
+        }
+
+        assert self.db.get_range(TSDBModel.group_generic, [], dts[0], dts[-1], rollup=3600) == {}
+
+    def test_get_distinct_counts_totals_users(self):
+        assert self.db.get_distinct_counts_totals(
+            TSDBModel.users_affected_by_generic_group,
+            [self.proj1group1.id],
+            self.now,
+            self.now + timedelta(hours=4),
+            rollup=3600,
+        ) == {
+            self.proj1group1.id: 5  # 5 unique users overall
+        }
+
+        assert self.db.get_distinct_counts_totals(
+            TSDBModel.users_affected_by_generic_group,
+            [self.proj1group1.id],
+            self.now,
+            self.now,
+            rollup=3600,
+        ) == {
+            self.proj1group1.id: 1  # Only 1 unique user in the first hour
+        }
+
+        assert (
+            self.db.get_distinct_counts_totals(
+                TSDBModel.users_affected_by_generic_group,
+                [],
+                self.now,
+                self.now + timedelta(hours=4),
+                rollup=3600,
+            )
+            == {}
+        )
+
+
 class AddJitterToSeriesTest(TestCase):
     def setUp(self):
         self.db = SnubaTSDB()