Browse Source

feat: Grouping breakdown prototype (#25394)

Co-authored-by: Priscila Oliveira <priscilawebdev@gmail.com>
Co-authored-by: sentry-internal-tools[bot] <66042841+sentry-internal-tools[bot]@users.noreply.github.com>
Markus Unterwaditzer 3 years ago
parent
commit
ee95a3e4fc

+ 333 - 0
src/sentry/api/endpoints/group_hashes_split.py

@@ -0,0 +1,333 @@
+import datetime
+from typing import Optional, Sequence
+
+import sentry_sdk
+from django.db import transaction
+from snuba_sdk.conditions import Condition, Op
+from snuba_sdk.orderby import Direction, OrderBy
+from snuba_sdk.query import Column, Entity, Function, Query
+
+from sentry import eventstore, features
+from sentry.api.bases import GroupEndpoint
+from sentry.api.serializers import EventSerializer, serialize
+from sentry.grouping.variants import ComponentVariant
+from sentry.models import Group, GroupHash
+from sentry.utils import snuba
+
+
+class GroupHashesSplitEndpoint(GroupEndpoint):
+    def get(self, request, group):
+        """
+        Return information on whether the group can be split up, has been split
+        up and what it will be split up into.
+
+        In the future this endpoint should supersede the GET on grouphashes
+        endpoint.
+        """
+
+        return self.respond(_render_trees(group, request.user), status=200)
+
+    def put(self, request, group):
+        """
+        Split up a group into subgroups
+        ```````````````````````````````
+
+        If a group is split up using this endpoint, new events that would have
+        been associated with this group will instead create 1..n new, more
+        "specific" groups according to their hierarchical group hashes.
+
+        For example, let's say you have a group containing all events whose
+        crashing frame was `log_error`, i.e. events are only grouped by one
+        frame. This is not a very descriptive frame to group by. If this
+        endpoint is hit, new events that crash in `log_error` will be sorted
+        into groups that hash by `log_error` and the next (calling) frame.
+
+        In the future this endpoint will move existing events into the new,
+        right groups.
+
+        :pparam string issue_id: the ID of the issue to split up.
+        :auth: required
+        """
+
+        if not features.has(
+            "organizations:grouping-tree-ui", group.project.organization, actor=request.user
+        ):
+
+            return self.respond(
+                {"error": "This project does not have the grouping tree feature"},
+                status=404,
+            )
+
+        hashes = request.GET.getlist("id")
+        for hash in hashes:
+            if not isinstance(hash, str) or len(hash) != 32:
+                return self.respond({"error": "hash does not look like a grouphash"}, status=400)
+
+        for hash in hashes:
+            _split_group(group, hash)
+
+        return self.respond(status=200)
+
+    def delete(self, request, group):
+        """
+        Un-split group(s) into their parent group
+        `````````````````````````````````````````
+
+        This basically undoes the split operation one can do with PUT on this
+        endpoint. Note that this API is not very RESTful: The group referenced
+        here is one of the subgroups created rather than the group that was
+        split up.
+
+        When unsplitting, all other child groups are left intact and can be
+        merged into the parent via regular issue merge.
+
+        In the future this endpoint will, much like for PUT, move existing
+        events of the referenced group into the parent group.
+
+        :pparam string issue_id: the ID of the issue to split up.
+        :auth: required
+        """
+
+        if not features.has(
+            "organizations:grouping-tree-ui", group.project.organization, actor=request.user
+        ):
+
+            return self.respond(
+                {"error": "This project does not have the grouping tree feature"},
+                status=404,
+            )
+
+        hashes = request.GET.getlist("id")
+        for hash in hashes:
+            if not isinstance(hash, str) or len(hash) != 32:
+                return self.respond({"error": "hash does not look like a grouphash"}, status=400)
+
+        for hash in hashes:
+            _unsplit_group(group, hash)
+
+        return self.respond(status=200)
+
+
+class NoHierarchicalHash(Exception):
+    pass
+
+
+def _split_group(group: Group, hash: str, hierarchical_hashes: Optional[Sequence[str]] = None):
+    if hierarchical_hashes is None:
+        hierarchical_hashes = _get_full_hierarchical_hashes(group, hash)
+
+    if not hierarchical_hashes:
+        raise NoHierarchicalHash()
+
+    hierarchical_grouphashes = {
+        grouphash.hash: grouphash
+        for grouphash in GroupHash.objects.filter(
+            project=group.project, group=group, hash__in=hierarchical_hashes
+        )
+    }
+
+    for hash in hierarchical_hashes:
+        grouphash = hierarchical_grouphashes.get(hash)
+        if grouphash is not None:
+            break
+    else:
+        raise NoHierarchicalHash()
+
+    # Mark one hierarchical hash as SPLIT. Note this also prevents it from
+    # being deleted in group deletion.
+    grouphash.state = GroupHash.State.SPLIT
+    grouphash.save()
+
+
+def _get_full_hierarchical_hashes(group: Group, hash: str) -> Optional[Sequence[str]]:
+    query = (
+        Query("events", Entity("events"))
+        .set_select(
+            [
+                Column("hierarchical_hashes"),
+            ]
+        )
+        .set_where(
+            _get_group_filters(group)
+            + [
+                Condition(
+                    Function(
+                        "has",
+                        [Column("hierarchical_hashes"), hash],
+                    ),
+                    Op.EQ,
+                    1,
+                ),
+            ]
+        )
+    )
+
+    data = snuba.raw_snql_query(query, referrer="group_split.get_full_hierarchical_hashes")["data"]
+    if not data:
+        return None
+
+    return data[0]["hierarchical_hashes"]
+
+
+def _unsplit_group(group: Group, hash: str, hierarchical_hashes: Optional[Sequence[str]] = None):
+    if hierarchical_hashes is None:
+        hierarchical_hashes = _get_full_hierarchical_hashes(group, hash)
+
+    if not hierarchical_hashes:
+        raise NoHierarchicalHash()
+
+    hierarchical_grouphashes = {
+        grouphash.hash: grouphash
+        for grouphash in GroupHash.objects.filter(
+            project=group.project, hash__in=hierarchical_hashes
+        )
+    }
+
+    grouphash_to_unsplit = None
+    grouphash_to_delete = None
+
+    # Only un-split one grouphash such that issue grouping only moves up only
+    # one level of the tree.
+
+    for hash in hierarchical_hashes:
+        grouphash = hierarchical_grouphashes.get(hash)
+        if grouphash is None:
+            continue
+
+        if grouphash.state == GroupHash.State.SPLIT:
+            grouphash_to_unsplit = grouphash
+
+        if grouphash.group_id == group.id:
+            grouphash_to_delete = grouphash
+
+    with transaction.atomic():
+        if grouphash_to_unsplit is not None:
+            grouphash_to_unsplit.state = GroupHash.State.UNLOCKED
+            grouphash_to_unsplit.save()
+
+        if grouphash_to_delete is not None:
+            grouphash_to_delete.delete()
+
+
+def _get_group_filters(group: Group):
+    return [
+        Condition(Column("timestamp"), Op.GTE, group.first_seen),
+        Condition(Column("timestamp"), Op.LT, group.last_seen + datetime.timedelta(seconds=1)),
+        Condition(Column("project_id"), Op.EQ, group.project_id),
+        Condition(Column("group_id"), Op.EQ, group.id),
+    ]
+
+
+def _render_trees(group: Group, user):
+    materialized_hashes = {
+        gh.hash for gh in GroupHash.objects.filter(project=group.project, group=group)
+    }
+
+    rv = []
+
+    common_where = _get_group_filters(group)
+
+    for materialized_hash in materialized_hashes:
+        # For every materialized hash we want to render parent and child
+        # hashes, a limited view of the entire tree. We fetch one sample event
+        # so we know how we need to slice hierarchical_hashes.
+        hierarchical_hashes = _get_full_hierarchical_hashes(group, materialized_hash)
+
+        if not hierarchical_hashes:
+            # No hierarchical_hashes found, the materialized hash is probably
+            # from flat grouping.
+            parent_pos = None
+            hash_pos = None
+            child_pos = None
+            slice_start = 0
+        else:
+            materialized_pos = hierarchical_hashes.index(materialized_hash)
+
+            if materialized_pos == 0:
+                parent_pos = None
+                hash_pos = 0
+                child_pos = 1
+                slice_start = 1
+            else:
+                parent_pos = 0
+                hash_pos = 1
+                child_pos = 2
+                slice_start = materialized_pos
+
+        # Select sub-views of the trees that contain materialized_hash.
+        query = (
+            Query("events", Entity("events"))
+            .set_select(
+                [
+                    Function("count", [], "event_count"),
+                    Function("argMax", [Column("event_id"), Column("timestamp")], "event_id"),
+                    Function("max", [Column("timestamp")], "latest_event_timestamp"),
+                    Function(
+                        "arraySlice", [Column("hierarchical_hashes"), slice_start, 3], "hashes"
+                    ),
+                ]
+            )
+            .set_where(
+                common_where
+                + [
+                    Condition(
+                        Function(
+                            "has",
+                            [
+                                Column("hierarchical_hashes"),
+                                materialized_hash,
+                            ],
+                        ),
+                        Op.EQ,
+                        1,
+                    ),
+                ]
+            )
+            .set_groupby([Column("hashes")])
+            .set_orderby([OrderBy(Column("latest_event_timestamp"), Direction.DESC)])
+        )
+
+        for row in snuba.raw_snql_query(query)["data"]:
+            assert not row["hashes"] or row["hashes"][hash_pos] == materialized_hash
+
+            event_id = row["event_id"]
+            event = eventstore.get_event_by_id(group.project_id, event_id)
+
+            tree = {
+                "parentId": _get_checked(row["hashes"], parent_pos),
+                "id": materialized_hash,
+                "childId": _get_checked(row["hashes"], child_pos),
+                "eventCount": row["event_count"],
+                "latestEvent": serialize(event, user, EventSerializer()),
+            }
+
+            rv.append(tree)
+
+            if not row["hashes"]:
+                continue
+
+            try:
+                for variant in event.get_grouping_variants().values():
+                    if not isinstance(variant, ComponentVariant):
+                        continue
+
+                    if variant.get_hash() == tree["parentId"]:
+                        tree["parentLabel"] = variant.component.tree_label
+
+                    if variant.get_hash() == tree["childId"]:
+                        tree["childLabel"] = variant.component.tree_label
+
+                    if variant.get_hash() == tree["id"]:
+                        tree["label"] = variant.component.tree_label
+            except Exception:
+                sentry_sdk.capture_exception()
+
+    rv.sort(key=lambda tree: (tree["parentId"] or "", tree["id"] or "", tree["childId"] or ""))
+
+    return rv
+
+
+def _get_checked(list, pos):
+    if pos is not None and pos < len(list):
+        return list[pos]
+    return None

+ 3 - 3
src/sentry/api/endpoints/group_reprocessing.py

@@ -10,10 +10,10 @@ class GroupReprocessingEndpoint(GroupEndpoint):
         `````````````````
 
         This endpoint triggers reprocessing for all events in a group.
-        Currently this means duplicating the events with new event IDs and
-        bumped timestamps.
 
-        :pparam string issue_id: the ID of the issue to retrieve.
+        :pparam string issue_id: the numeric ID of the issue to reprocess. The
+            reprocessed events will be assigned to a new numeric ID. See comments
+            in sentry.reprocessing2.
         :auth: required
         """
 

+ 2 - 0
src/sentry/api/urls.py

@@ -91,6 +91,7 @@ from .endpoints.group_external_issue_details import GroupExternalIssueDetailsEnd
 from .endpoints.group_external_issues import GroupExternalIssuesEndpoint
 from .endpoints.group_first_last_release import GroupFirstLastReleaseEndpoint
 from .endpoints.group_hashes import GroupHashesEndpoint
+from .endpoints.group_hashes_split import GroupHashesSplitEndpoint
 from .endpoints.group_integration_details import GroupIntegrationDetailsEndpoint
 from .endpoints.group_integrations import GroupIntegrationsEndpoint
 from .endpoints.group_notes import GroupNotesEndpoint
@@ -401,6 +402,7 @@ GROUP_URLS = [
         GroupNotesDetailsEndpoint.as_view(),
     ),
     url(r"^(?P<issue_id>[^\/]+)/hashes/$", GroupHashesEndpoint.as_view()),
+    url(r"^(?P<issue_id>[^\/]+)/hashes/split/$", GroupHashesSplitEndpoint.as_view()),
     url(r"^(?P<issue_id>[^\/]+)/reprocessing/$", GroupReprocessingEndpoint.as_view()),
     url(r"^(?P<issue_id>[^\/]+)/stats/$", GroupStatsEndpoint.as_view()),
     url(r"^(?P<issue_id>[^\/]+)/tags/$", GroupTagsEndpoint.as_view()),

+ 2 - 0
src/sentry/conf/server.py

@@ -881,6 +881,8 @@ SENTRY_FEATURES = {
     "organizations:trace-view-summary": False,
     # Enable multi project selection
     "organizations:global-views": False,
+    # Enable experimental new version of Merged Issues where sub-hashes are shown
+    "organizations:grouping-tree-ui": False,
     # Lets organizations manage grouping configs
     "organizations:set-grouping-config": False,
     # Lets organizations set a custom title through fingerprinting

+ 7 - 2
src/sentry/event_manager.py

@@ -1099,9 +1099,14 @@ def _find_existing_group_id(
                 all_grouphashes.append(group_hash)
 
         if root_hierarchical_hash is None:
-            # All hashes were split (should not be reachable from UI), so
-            # we group by most specific hash.
+            # All hashes were split, so we group by most specific hash. This is
+            # a legitimate usecase when there are events whose stacktraces are
+            # suffixes of other event's stacktraces.
             root_hierarchical_hash = hierarchical_hashes[-1]
+            group_hash = hierarchical_grouphashes.get(root_hierarchical_hash)
+
+            if group_hash is not None:
+                all_grouphashes.append(group_hash)
 
     if not found_split:
         # In case of a split we want to avoid accidentally finding the split-up

+ 10 - 2
src/sentry/eventstore/models.py

@@ -4,6 +4,7 @@ from datetime import datetime
 from hashlib import md5
 
 import pytz
+import sentry_sdk
 from dateutil.parser import parse as parse_date
 from django.conf import settings
 from django.utils.encoding import force_text
@@ -416,9 +417,16 @@ class Event:
         config = load_grouping_config(config)
 
         if normalize_stacktraces:
-            normalize_stacktraces_for_grouping(self.data, config)
+            with sentry_sdk.start_span(op="grouping.normalize_stacktraces_for_grouping") as span:
+                span.set_tag("project", self.project_id)
+                span.set_tag("event_id", self.event_id)
+                normalize_stacktraces_for_grouping(self.data, config)
 
-        return get_grouping_variants_for_event(self, config)
+        with sentry_sdk.start_span(op="grouping.get_grouping_variants") as span:
+            span.set_tag("project", self.project_id)
+            span.set_tag("event_id", self.event_id)
+
+            return get_grouping_variants_for_event(self, config)
 
     def get_primary_hash(self):
         flat_hashes, hierarchical_hashes = self.get_hashes()

+ 1 - 0
src/sentry/features/__init__.py

@@ -73,6 +73,7 @@ default_manager.add("organizations:event-attachments-viewer", OrganizationFeatur
 default_manager.add("organizations:events", OrganizationFeature)  # NOQA
 default_manager.add("organizations:filters-and-sampling", OrganizationFeature)  # NOQA
 default_manager.add("organizations:global-views", OrganizationFeature)  # NOQA
+default_manager.add("organizations:grouping-tree-ui", OrganizationFeature)  # NOQA
 default_manager.add("organizations:images-loaded-v2", OrganizationFeature)  # NOQA
 default_manager.add("organizations:import-codeowners", OrganizationFeature)  # NOQA
 default_manager.add("organizations:inbox", OrganizationFeature)  # NOQA

+ 7 - 1
src/sentry/group_deletion.py

@@ -20,7 +20,13 @@ def delete_group(group):
     eventstream_state = eventstream.start_delete_groups(group.project_id, [group.id])
     transaction_id = uuid4().hex
 
-    GroupHash.objects.filter(project_id=group.project_id, group__id=group.id).delete()
+    # We do not want to delete splitted hash as it is necessary for keeping groups... split.
+    GroupHash.objects.filter(
+        project_id=group.project_id, group__id=group.id, state=GroupHash.State.SPLIT
+    ).update(group=None)
+    GroupHash.objects.filter(project_id=group.project_id, group__id=group.id).exclude(
+        state=GroupHash.State.SPLIT
+    ).delete()
     # We remove `GroupInbox` rows here so that they don't end up influencing queries for
     # `Group` instances that are pending deletion
     GroupInbox.objects.filter(project_id=group.project.id, group__id=group.id).delete()

+ 0 - 1
src/sentry/grouping/strategies/hierarchical.py

@@ -120,7 +120,6 @@ def _build_fallback_tree(main_variant, components, frames, inverted_hierarchy):
         frames.extend(post_frames)
 
         if len(prev_variant.values) == len(frames):
-            all_variants[key] = main_variant
             break
 
         tree_label = _compute_tree_label(prev_variant, frames)

+ 1 - 0
static/app/actions/groupingActions.tsx

@@ -7,6 +7,7 @@ const GroupingActions = Reflux.createActions([
   'toggleUnmerge',
   'toggleMerge',
   'unmerge',
+  'split',
   'merge',
   'toggleCollapseFingerprint',
   'toggleCollapseFingerprints',

Some files were not shown because too many files changed in this diff