Browse Source

feat(most-helpful-event): add support for querying the most helpful event for a group (#51124)

Currently, the `GroupEventDetailsEndpoint` supports retrieving the
latest or oldest event associated with the Group.

This PR adds a third option, retrieving the 'most helpful' event
associated with the Group. In practical terms, this new sort method
prioritizes events with:
- `replay_id`, events with a replay have a higher sort order than events
without a replay
- `profile_id`, events with a profile have a higher sort order than
events without a profile
- `num_processing_errors`, events with fewer processing errors are
prioritized over events with processing errrors
- `trace.sampled`, error events with full trace will be prioritized over
events without full trace

This feature is behind a feature flag.

Resolves https://github.com/getsentry/sentry/issues/50890,
https://github.com/getsentry/sentry/issues/51070
Gilbert Szeto 1 year ago
parent
commit
4835a79cfb

+ 22 - 9
src/sentry/api/endpoints/group_event_details.py

@@ -5,13 +5,14 @@ from typing import TYPE_CHECKING
 from rest_framework.request import Request
 from rest_framework.response import Response
 
-from sentry import eventstore
+from sentry import eventstore, features
 from sentry.api.base import region_silo_endpoint
 from sentry.api.bases.group import GroupEndpoint
 from sentry.api.endpoints.project_event_details import wrap_event_response
 from sentry.api.helpers.environments import get_environments
 from sentry.api.serializers import EventSerializer, serialize
 from sentry.types.ratelimit import RateLimit, RateLimitCategory
+from sentry.utils import metrics
 
 if TYPE_CHECKING:
     from sentry.models.group import Group
@@ -30,24 +31,36 @@ class GroupEventDetailsEndpoint(GroupEndpoint):
 
     def get(self, request: Request, group: Group, event_id: str) -> Response:
         """
-        Retrieve the Latest Event for an Issue
+        Retrieve the latest(most recent), oldest, or most helpful Event for an Issue
         ``````````````````````````````````````
 
-        Retrieves the details of the latest event for an issue.
+        Retrieves the details of the latest/oldest/most-helpful event for an issue.
 
         :pparam string group_id: the ID of the issue
         """
         environments = [e.name for e in get_environments(request, group.project.organization)]
-        event = None
 
         if event_id == "latest":
-            event = group.get_latest_event_for_environments(environments)
+            with metrics.timer("api.endpoints.group_event_details.get", tags={"type": "latest"}):
+                event = group.get_latest_event_for_environments(environments)
         elif event_id == "oldest":
-            event = group.get_oldest_event_for_environments(environments)
+            with metrics.timer("api.endpoints.group_event_details.get", tags={"type": "oldest"}):
+                event = group.get_oldest_event_for_environments(environments)
+        elif event_id == "helpful":
+            if features.has(
+                "organizations:issue-details-most-helpful-event", group.project.organization
+            ):
+                with metrics.timer(
+                    "api.endpoints.group_event_details.get", tags={"type": "helpful"}
+                ):
+                    event = group.get_helpful_event_for_environments(environments)
+            else:
+                return Response(status=404)
         else:
-            event = eventstore.backend.get_event_by_id(
-                group.project.id, event_id, group_id=group.id
-            )
+            with metrics.timer("api.endpoints.group_event_details.get", tags={"type": "event"}):
+                event = eventstore.backend.get_event_by_id(
+                    group.project.id, event_id, group_id=group.id
+                )
             # TODO: Remove `for_group` check once performance issues are moved to the issue platform
             if hasattr(event, "for_group") and event.group:
                 event = event.for_group(event.group)

+ 3 - 3
src/sentry/api/endpoints/project_event_details.py

@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Any, List
+from typing import Any, List, Union
 
 from rest_framework.request import Request
 from rest_framework.response import Response
@@ -8,12 +8,12 @@ from sentry import eventstore
 from sentry.api.base import region_silo_endpoint
 from sentry.api.bases.project import ProjectEndpoint
 from sentry.api.serializers import IssueEventSerializer, serialize
-from sentry.eventstore.models import Event
+from sentry.eventstore.models import Event, GroupEvent
 
 
 def wrap_event_response(
     request_user: Any,
-    event: Event,
+    event: Union[Event, GroupEvent],
     environments: List[str],
     include_full_release_data: bool = False,
 ):

+ 1 - 1
src/sentry/api/urls.py

@@ -564,7 +564,7 @@ GROUP_URLS = [
         GroupEventsEndpoint.as_view(),
     ),
     url(
-        r"^(?P<issue_id>[^\/]+)/events/(?P<event_id>(?:latest|oldest|\d+|[A-Fa-f0-9-]{32,36}))/$",
+        r"^(?P<issue_id>[^\/]+)/events/(?P<event_id>(?:latest|oldest|helpful|\d+|[A-Fa-f0-9-]{32,36}))/$",
         GroupEventDetailsEndpoint.as_view(),
     ),
     url(

+ 2 - 0
src/sentry/conf/server.py

@@ -1474,6 +1474,8 @@ SENTRY_FEATURES = {
     "organizations:sql-format": False,
     # Enable experimental replay-issue rendering on Issue Details page
     "organizations:issue-details-replay-event": False,
+    # Enable sorting Issue detail events by 'most helpful'
+    "organizations:issue-details-most-helpful-event": False,
     # Enable prefetching of issues from the issue list when hovered
     "organizations:issue-list-prefetch-issue-on-hover": False,
     # Enable better priority sort algorithm.

+ 20 - 0
src/sentry/eventstore/base.py

@@ -1,6 +1,9 @@
 from copy import deepcopy
+from datetime import datetime
+from typing import Optional, Sequence
 
 import sentry_sdk
+from snuba_sdk import Condition
 
 from sentry import nodestore
 from sentry.eventstore.models import Event
@@ -121,6 +124,7 @@ class EventStorage(Service):
         "create_event",
         "get_event_by_id",
         "get_events",
+        "get_events_snql",
         "get_unfetched_events",
         "get_adjacent_event_ids",
         "bind_nodes",
@@ -172,6 +176,22 @@ class EventStorage(Service):
         """
         raise NotImplementedError
 
+    def get_events_snql(
+        self,
+        organization_id: int,
+        group_id: int,
+        start: Optional[datetime],
+        end: Optional[datetime],
+        conditions: Sequence[Condition],
+        orderby: Sequence[str],
+        limit=100,
+        offset=0,
+        referrer="eventstore.get_events_snql",
+        dataset=Dataset.Events,
+        tenant_ids=None,
+    ):
+        raise NotImplementedError
+
     def get_unfetched_events(
         self,
         snuba_filter,

+ 98 - 1
src/sentry/eventstore/snuba/backend.py

@@ -2,10 +2,23 @@ import logging
 import random
 from copy import copy, deepcopy
 from datetime import datetime, timedelta
-from typing import Any, Mapping
+from typing import Any, Mapping, Optional, Sequence
 
 import sentry_sdk
 from django.utils import timezone
+from snuba_sdk import (
+    Column,
+    Condition,
+    Direction,
+    Entity,
+    Function,
+    Limit,
+    Offset,
+    Op,
+    OrderBy,
+    Query,
+    Request,
+)
 
 from sentry.eventstore.base import EventStorage
 from sentry.eventstore.models import Event
@@ -13,6 +26,7 @@ from sentry.models.group import Group
 from sentry.snuba.dataset import Dataset
 from sentry.snuba.events import Columns
 from sentry.utils import snuba
+from sentry.utils.snuba import DATASETS, _prepare_start_end, raw_snql_query
 from sentry.utils.validators import normalize_event_id
 
 EVENT_ID = Columns.EVENT_ID.value.alias
@@ -48,6 +62,89 @@ class SnubaEventStorage(EventStorage):
     Eventstore backend backed by Snuba
     """
 
+    def get_events_snql(
+        self,
+        organization_id: int,
+        group_id: int,
+        start: Optional[datetime],
+        end: Optional[datetime],
+        conditions: Sequence[Condition],
+        orderby: Sequence[str],
+        limit=DEFAULT_LIMIT,
+        offset=DEFAULT_OFFSET,
+        referrer="eventstore.get_events_snql",
+        dataset=snuba.Dataset.Events,
+        tenant_ids=None,
+    ):
+        cols = self.__get_columns(dataset)
+
+        resolved_order_by = []
+        for order_field_alias in orderby:
+            if order_field_alias.startswith("-"):
+                direction = Direction.DESC
+                order_field_alias = order_field_alias[1:]
+            else:
+                direction = Direction.ASC
+            resolved_column_or_none = DATASETS[dataset].get(order_field_alias)
+            if resolved_column_or_none:
+                # special-case handling for nullable column values and proper ordering based on direction
+                # null values are always last in the sort order regardless of Desc or Asc ordering
+                if order_field_alias == Columns.NUM_PROCESSING_ERRORS.value.alias:
+                    resolved_order_by.append(
+                        OrderBy(
+                            Function("coalesce", [Column(resolved_column_or_none), 99999999]),
+                            direction=direction,
+                        )
+                    )
+                elif order_field_alias == Columns.TRACE_SAMPLED.value.alias:
+                    resolved_order_by.append(
+                        OrderBy(
+                            Function("coalesce", [Column(resolved_column_or_none), -1]),
+                            direction=direction,
+                        )
+                    )
+                else:
+                    resolved_order_by.append(
+                        OrderBy(Column(resolved_column_or_none), direction=direction)
+                    )
+        orderby = resolved_order_by
+
+        start, end = _prepare_start_end(
+            start,
+            end,
+            organization_id,
+            [group_id],
+        )
+
+        snql_request = Request(
+            dataset=dataset.value,
+            app_id="eventstore",
+            query=Query(
+                match=Entity(dataset.value),
+                select=[Column(col) for col in cols],
+                where=[
+                    Condition(
+                        Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]), Op.GTE, start
+                    ),
+                    Condition(Column(DATASETS[dataset][Columns.TIMESTAMP.value.alias]), Op.LT, end),
+                ]
+                + list(conditions),
+                orderby=orderby,
+                limit=Limit(limit),
+                offset=Offset(offset),
+            ),
+            tenant_ids=tenant_ids or dict(),
+        )
+
+        result = raw_snql_query(snql_request, referrer, use_cache=False)
+
+        if "error" not in result:
+            events = [self.__make_event(evt) for evt in result["data"]]
+            self.bind_nodes(events)
+            return events
+
+        return []
+
     def get_events(
         self,
         filter,

+ 1 - 0
src/sentry/features/__init__.py

@@ -89,6 +89,7 @@ default_manager.add("organizations:integrations-discord", OrganizationFeature, F
 default_manager.add("organizations:issue-alert-fallback-targeting", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
 default_manager.add("organizations:sql-format", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
 default_manager.add("organizations:issue-details-replay-event", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
+default_manager.add("organizations:issue-details-most-helpful-event", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
 default_manager.add("organizations:issue-details-tag-improvements", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
 default_manager.add("organizations:issue-list-prefetch-issue-on-hover", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
 default_manager.add("organizations:issue-list-better-priority-sort", OrganizationFeature, FeatureHandlerStrategy.REMOTE)

+ 49 - 1
src/sentry/models/group.py

@@ -18,6 +18,7 @@ from django.dispatch import receiver
 from django.utils import timezone
 from django.utils.http import urlencode
 from django.utils.translation import ugettext_lazy as _
+from snuba_sdk import Column, Condition, Op
 
 from sentry import eventstore, eventtypes, tagstore
 from sentry.constants import DEFAULT_LOGGER_NAME, LOG_LEVELS, MAX_CULPRIT_LENGTH
@@ -197,6 +198,13 @@ STATUS_UPDATE_CHOICES = {
 class EventOrdering(Enum):
     LATEST = ["-timestamp", "-event_id"]
     OLDEST = ["timestamp", "event_id"]
+    MOST_HELPFUL = [
+        "-replayId",
+        "-profile.id",
+        "num_processing_errors",
+        "-trace.sampled",
+        "-timestamp",
+    ]
 
 
 def get_oldest_or_latest_event_for_environments(
@@ -215,7 +223,6 @@ def get_oldest_or_latest_event_for_environments(
     _filter = eventstore.Filter(
         conditions=conditions, project_ids=[group.project_id], group_ids=[group.id]
     )
-
     events = eventstore.backend.get_events(
         filter=_filter,
         limit=1,
@@ -231,6 +238,39 @@ def get_oldest_or_latest_event_for_environments(
     return None
 
 
+def get_helpful_event_for_environments(
+    environments: Sequence[str], group: Group
+) -> GroupEvent | None:
+    if group.issue_category == GroupCategory.ERROR:
+        dataset = Dataset.Events
+    else:
+        dataset = Dataset.IssuePlatform
+
+    conditions = []
+    if len(environments) > 0:
+        conditions.append(Condition(Column("environment"), Op.IN, environments))
+    conditions.append(Condition(Column("project_id"), Op.IN, [group.project.id]))
+    conditions.append(Condition(Column("group_id"), Op.IN, [group.id]))
+
+    events = eventstore.get_events_snql(
+        organization_id=group.project.organization_id,
+        group_id=group.id,
+        start=None,
+        end=None,
+        conditions=conditions,
+        limit=1,
+        orderby=EventOrdering.MOST_HELPFUL.value,
+        referrer="Group.get_helpful",
+        dataset=dataset,
+        tenant_ids={"organization_id": group.project.organization_id},
+    )
+
+    if events:
+        return events[0].for_group(group)
+
+    return None
+
+
 class GroupManager(BaseManager):
     use_for_related_fields = True
 
@@ -595,6 +635,14 @@ class Group(Model):
             self,
         )
 
+    def get_helpful_event_for_environments(
+        self, environments: Sequence[str] = ()
+    ) -> GroupEvent | None:
+        return get_helpful_event_for_environments(
+            environments,
+            self,
+        )
+
     def get_first_release(self) -> str | None:
         from sentry.models import Release
 

+ 19 - 1
src/sentry/snuba/events.py

@@ -731,9 +731,27 @@ class Columns(Enum):
 
     REPLAY_ID = Column(
         group_name=None,
-        event_name=None,
+        event_name="replay_id",
         transaction_name=None,
         discover_name=None,
         issue_platform_name="replay_id",
         alias="replayId",
     )
+
+    TRACE_SAMPLED = Column(
+        group_name=None,
+        event_name="trace_sampled",
+        transaction_name=None,
+        discover_name=None,
+        issue_platform_name=None,
+        alias="trace.sampled",
+    )
+
+    NUM_PROCESSING_ERRORS = Column(
+        group_name=None,
+        event_name="num_processing_errors",
+        transaction_name=None,
+        discover_name=None,
+        issue_platform_name=None,
+        alias="num_processing_errors",
+    )

+ 42 - 6
tests/sentry/api/endpoints/test_group_event_details.py

@@ -1,7 +1,9 @@
+import uuid
 from uuid import uuid4
 
 from sentry.models.release import Release
 from sentry.testutils import APITestCase, SnubaTestCase
+from sentry.testutils.helpers import with_feature
 from sentry.testutils.helpers.datetime import before_now, iso_format
 from sentry.testutils.silo import region_silo_test
 
@@ -12,13 +14,13 @@ class GroupEventDetailsEndpointTest(APITestCase, SnubaTestCase):
         super().setUp()
 
         self.login_as(user=self.user)
-        project = self.create_project()
+        self.project_1 = self.create_project()
 
         release_version = uuid4().hex
         release = Release.objects.create(
-            organization_id=self.project.organization_id, version=release_version
+            organization_id=self.project_1.organization_id, version=release_version
         )
-        release.add_project(self.project)
+        release.add_project(self.project_1)
 
         self.event_a = self.store_event(
             data={
@@ -28,7 +30,7 @@ class GroupEventDetailsEndpointTest(APITestCase, SnubaTestCase):
                 "fingerprint": ["group-1"],
                 "release": release_version,
             },
-            project_id=project.id,
+            project_id=self.project_1.id,
         )
         self.event_b = self.store_event(
             data={
@@ -38,7 +40,7 @@ class GroupEventDetailsEndpointTest(APITestCase, SnubaTestCase):
                 "fingerprint": ["group-1"],
                 "release": release_version,
             },
-            project_id=project.id,
+            project_id=self.project_1.id,
         )
         self.event_c = self.store_event(
             data={
@@ -48,7 +50,7 @@ class GroupEventDetailsEndpointTest(APITestCase, SnubaTestCase):
                 "fingerprint": ["group-1"],
                 "release": release_version,
             },
-            project_id=project.id,
+            project_id=self.project_1.id,
         )
 
     def test_get_simple_latest(self):
@@ -129,3 +131,37 @@ class GroupEventDetailsEndpointTest(APITestCase, SnubaTestCase):
         assert not {"firstEvent", "lastEvent", "newGroups"} <= set(
             response_with_collapse.data["release"]
         )
+
+    def test_get_helpful_feature_off(self):
+        url = f"/api/0/issues/{self.event_a.group.id}/events/helpful/"
+        response = self.client.get(url, format="json")
+
+        assert response.status_code == 404, response.content
+
+    @with_feature("organizations:issue-details-most-helpful-event")
+    def test_get_simple_helpful(self):
+        self.event_d = self.store_event(
+            data={
+                "event_id": "d" * 32,
+                "environment": "staging",
+                "timestamp": iso_format(before_now(minutes=1)),
+                "fingerprint": ["group-1"],
+                "contexts": {
+                    "replay": {"replay_id": uuid.uuid4().hex},
+                    "trace": {
+                        "sampled": True,
+                        "span_id": "babaae0d4b7512d9",
+                        "trace_id": "a7d67cf796774551a95be6543cacd459",
+                    },
+                },
+                "errors": [],
+            },
+            project_id=self.project_1.id,
+        )
+        url = f"/api/0/issues/{self.event_a.group.id}/events/helpful/"
+        response = self.client.get(url, format="json")
+
+        assert response.status_code == 200, response.content
+        assert response.data["id"] == str(self.event_d.event_id)
+        assert response.data["previousEventID"] == self.event_c.event_id
+        assert response.data["nextEventID"] is None

Some files were not shown because too many files changed in this diff