Browse Source

feat(grouping): Add grouping information to stacktrace frames (#27231)

Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
Markus Unterwaditzer 3 years ago
parent
commit
be0c24a563

+ 1 - 0
mypy.ini

@@ -18,6 +18,7 @@ files = src/sentry/api/bases/external_actor.py,
         src/sentry/api/validators/notifications.py,
         src/sentry/grouping/strategies/base.py,
         src/sentry/grouping/strategies/message.py,
+        src/sentry/grouping/result.py,
         src/sentry/integrations/slack/message_builder/**/*.py,
         src/sentry/integrations/slack/requests/*.py,
         src/sentry/integrations/slack/util/*.py,

+ 3 - 4
src/sentry/api/endpoints/grouping_levels.py

@@ -144,11 +144,10 @@ def _list_levels(group):
 
     # It is a little silly to transfer a list of integers rather than just
     # giving the UI a range, but in the future we may want to add
-    # additional fields to each level. Also it is good if the UI does not
-    # assume too much about the form of IDs.
-    levels = [{"id": str(i)} for i in range(fields.num_levels)]
+    # additional fields to each level.
+    levels = [{"id": i} for i in range(fields.num_levels)]
 
     current_level = fields.current_level
-    assert levels[current_level]["id"] == str(current_level)
+    assert levels[current_level]["id"] == current_level
     levels[current_level]["isCurrent"] = True
     return {"levels": levels}

+ 12 - 6
src/sentry/event_manager.py

@@ -23,7 +23,6 @@ from sentry.constants import (
     DataCategory,
 )
 from sentry.culprit import generate_culprit
-from sentry.eventstore.models import CalculatedHashes
 from sentry.eventstore.processing import event_processing_store
 from sentry.grouping.api import (
     BackgroundGroupingConfigLoader,
@@ -35,6 +34,7 @@ from sentry.grouping.api import (
     get_grouping_config_dict_for_project,
     load_grouping_config,
 )
+from sentry.grouping.result import CalculatedHashes
 from sentry.ingest.inbound_filters import FilterStatKeys
 from sentry.killswitches import killswitch_matches_context
 from sentry.lang.native.utils import STORE_CRASH_REPORTS_ALL, convert_crashreport_count
@@ -387,6 +387,9 @@ class EventManager:
         if not do_background_grouping_before:
             _run_background_grouping(project, job)
 
+        if hashes.tree_labels:
+            job["finest_tree_label"] = hashes.finest_tree_label
+
         _materialize_metadata_many(jobs)
 
         kwargs = {
@@ -710,9 +713,8 @@ def _materialize_metadata_many(jobs):
         # In save_aggregate we store current_tree_label for the group metadata,
         # and finest_tree_label for the event's own title.
 
-        finest_tree_label = get_path(data, "hierarchical_tree_labels", -1)
-        if finest_tree_label is not None:
-            event_metadata["finest_tree_label"] = finest_tree_label
+        if "finest_tree_label" in job:
+            event_metadata["finest_tree_label"] = job["finest_tree_label"]
 
         data.update(materialize_metadata(data, event_type, event_metadata))
         job["culprit"] = data["culprit"]
@@ -990,8 +992,12 @@ def _save_aggregate(event, hashes, release, metadata, received_timestamp, **kwar
             project=project, hash=root_hierarchical_hash
         )[0]
 
-        metadata["current_tree_label"] = hashes.tree_label_from_hash(
-            existing_grouphash.hash if existing_grouphash is not None else root_hierarchical_hash
+        metadata.update(
+            hashes.group_metadata_from_hash(
+                existing_grouphash.hash
+                if existing_grouphash is not None
+                else root_hierarchical_hash
+            )
         )
 
     else:

+ 2 - 37
src/sentry/eventstore/models.py

@@ -1,9 +1,8 @@
 import string
 from collections import OrderedDict
-from dataclasses import dataclass
 from datetime import datetime
 from hashlib import md5
-from typing import Mapping, Optional, Sequence
+from typing import Mapping, Optional
 
 import pytz
 import sentry_sdk
@@ -13,6 +12,7 @@ from django.utils.encoding import force_text
 
 from sentry import eventtypes
 from sentry.db.models import NodeData
+from sentry.grouping.result import CalculatedHashes
 from sentry.interfaces.base import get_interfaces
 from sentry.models import EventDict
 from sentry.snuba.events import Columns
@@ -31,41 +31,6 @@ def ref_func(x):
     return x.project_id or x.project.id
 
 
-TreeLabel = Sequence[str]
-
-
-@dataclass(frozen=True)
-class CalculatedHashes:
-    hashes: Sequence[str]
-    hierarchical_hashes: Sequence[str]
-    tree_labels: Sequence[TreeLabel]
-
-    def write_to_event(self, event_data):
-        event_data["hashes"] = self.hashes
-
-        if self.hierarchical_hashes:
-            event_data["hierarchical_hashes"] = self.hierarchical_hashes
-            event_data["hierarchical_tree_labels"] = self.tree_labels
-
-    @classmethod
-    def from_event(cls, event_data) -> Optional["CalculatedHashes"]:
-        hashes = event_data.get("hashes")
-        hierarchical_hashes = event_data.get("hierarchical_hashes") or []
-        tree_labels = event_data.get("hierarchical_tree_labels") or []
-        if hashes is not None:
-            return cls(
-                hashes=hashes, hierarchical_hashes=hierarchical_hashes, tree_labels=tree_labels
-            )
-
-        return None
-
-    def tree_label_from_hash(self, hash: str) -> Optional[str]:
-        try:
-            return self.tree_labels[self.hierarchical_hashes.index(hash)]
-        except (IndexError, ValueError):
-            return None
-
-
 class Event:
     """
     Event backed by nodestore and Snuba.

+ 3 - 1
src/sentry/eventtypes/base.py

@@ -8,7 +8,9 @@ from sentry.utils.strings import strip, truncatechars
 
 
 def format_title_from_tree_label(tree_label):
-    return " | ".join(tree_label)
+    return " | ".join(
+        filter(bool, (x.get("function") or x.get("package") or x.get("type") for x in tree_label))
+    )
 
 
 def compute_title_with_tree_label(title: Optional[str], metadata: dict):

+ 121 - 0
src/sentry/grouping/result.py

@@ -0,0 +1,121 @@
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Union
+
+from sentry.utils.safe import get_path, safe_execute, set_path
+
+# TODO(3.8): This is a hack so we can get TypedDicts before 3.8
+if TYPE_CHECKING:
+    from mypy_extensions import TypedDict
+else:
+
+    def TypedDict(*args, **kwargs):
+        pass
+
+
+EventData = Dict[str, Any]
+EventMetadata = Dict[str, Any]
+
+
+TreeLabelPart = TypedDict(
+    "TreeLabelPart",
+    {
+        "function": str,
+        "package": str,
+        "is_sentinel": bool,
+        "is_prefix": bool,
+        "datapath": Sequence[Union[str, int]],
+    },
+)
+
+StrippedTreeLabelPart = TypedDict(
+    "StrippedTreeLabelPart",
+    {
+        "function": str,
+        "package": str,
+        "is_sentinel": bool,
+        "is_prefix": bool,
+    },
+)
+
+
+TreeLabel = Sequence[TreeLabelPart]
+StrippedTreeLabel = Sequence[StrippedTreeLabelPart]
+
+
+def _strip_tree_label(tree_label: TreeLabel) -> StrippedTreeLabel:
+    rv = []
+    for part in tree_label:
+        stripped_part: StrippedTreeLabelPart = dict(part)  # type: ignore
+        # TODO(markus): Remove more stuff here if we never use it in group
+        # title
+        stripped_part.pop("datapath", None)  # type: ignore
+        rv.append(stripped_part)
+
+    return rv
+
+
+def _write_tree_labels(tree_labels: Sequence[TreeLabel], event_data: EventData) -> None:
+    event_data["hierarchical_tree_labels"] = event_labels = []
+
+    for level, tree_label in enumerate(tree_labels):
+        event_labels.append(_strip_tree_label(tree_label))
+
+        for part in tree_label:
+            datapath = part["datapath"]
+            frame = get_path(event_data, *datapath)
+            if not frame:
+                raise ValueError("datapath not found in event")
+
+            if part.get("is_sentinel"):
+                set_path(frame, "data", "is_sentinel", value=True)
+
+            if part.get("is_prefix"):
+                set_path(frame, "data", "is_prefix", value=True)
+
+            prev_level = get_path(frame, "data", "min_grouping_level")
+            if not isinstance(prev_level, int) or level < prev_level:
+                set_path(frame, "data", "min_grouping_level", value=level)
+
+
+@dataclass(frozen=True)
+class CalculatedHashes:
+    hashes: Sequence[str]
+    hierarchical_hashes: Sequence[str]
+    tree_labels: Sequence[TreeLabel]
+
+    def write_to_event(self, event_data: EventData) -> None:
+        event_data["hashes"] = self.hashes
+
+        if self.hierarchical_hashes:
+            event_data["hierarchical_hashes"] = self.hierarchical_hashes
+
+            safe_execute(_write_tree_labels, self.tree_labels, event_data, _with_transaction=False)
+
+    @classmethod
+    def from_event(cls, event_data: EventData) -> Optional["CalculatedHashes"]:
+        hashes = event_data.get("hashes")
+        hierarchical_hashes = event_data.get("hierarchical_hashes") or []
+        tree_labels = event_data.get("hierarchical_tree_labels") or []
+        if hashes is not None:
+            return cls(
+                hashes=hashes, hierarchical_hashes=hierarchical_hashes, tree_labels=tree_labels
+            )
+
+        return None
+
+    @property
+    def finest_tree_label(self) -> Optional[StrippedTreeLabel]:
+        try:
+            return _strip_tree_label(self.tree_labels[-1])
+        except IndexError:
+            return None
+
+    def group_metadata_from_hash(self, hash: str) -> EventMetadata:
+        try:
+            i = self.hierarchical_hashes.index(hash)
+            return {
+                "current_level": i,
+                "current_tree_label": _strip_tree_label(self.tree_labels[i]),
+            }
+        except (IndexError, ValueError):
+            return {}

+ 7 - 1
src/sentry/grouping/strategies/hierarchical.py

@@ -79,7 +79,13 @@ def _compute_tree_label(components: Iterable[GroupingComponent]):
 
     for frame in components:
         if frame.contributes and frame.tree_label:
-            tree_label.append(frame.tree_label)
+            lbl = dict(frame.tree_label)
+            if frame.is_sentinel_frame:
+                lbl["is_sentinel"] = True
+            if frame.is_prefix_frame:
+                lbl["is_prefix"] = True
+
+            tree_label.append(lbl)
 
     # We assume all components are always sorted in the way frames appear in
     # the event (threadbase -> crashing frame). Then we want to show the

+ 5 - 2
src/sentry/grouping/strategies/newstyle.py

@@ -267,7 +267,7 @@ def get_function_component(
             )
 
     if function_component.values and context["hierarchical_grouping"]:
-        function_component.update(tree_label=function_component.values[0])
+        function_component.update(tree_label={"function": function_component.values[0]})
 
     return function_component
 
@@ -346,7 +346,7 @@ def frame(frame, event, context, **meta):
                 )
 
             if package_component.values and context["hierarchical_grouping"]:
-                package_component.update(tree_label=package_component.values[0])
+                package_component.update(tree_label={"package": package_component.values[0]})
 
             values.append(package_component)
 
@@ -385,6 +385,9 @@ def frame(frame, event, context, **meta):
     if context["is_recursion"]:
         rv.update(contributes=False, hint="ignored due to recursion")
 
+    if rv.tree_label:
+        rv.tree_label = {"datapath": frame.datapath, **rv.tree_label}
+
     return {context["variant"]: rv}
 
 

+ 22 - 5
src/sentry/interfaces/base.py

@@ -1,5 +1,6 @@
 import logging
 from collections import OrderedDict
+from typing import Any, Dict, List, Optional, Union
 
 from django.conf import settings
 from django.utils.translation import ugettext as _
@@ -9,11 +10,13 @@ from sentry.utils.decorators import classproperty
 from sentry.utils.html import escape
 from sentry.utils.imports import import_string
 from sentry.utils.json import prune_empty_keys
-from sentry.utils.safe import safe_execute
+from sentry.utils.safe import get_path, safe_execute
 
 logger = logging.getLogger("sentry.events")
 interface_logger = logging.getLogger("sentry.interfaces")
 
+DataPath = List[Union[str, int]]
+
 
 def get_interface(name):
     try:
@@ -42,7 +45,7 @@ def get_interfaces(data):
         except ValueError:
             continue
 
-        value = safe_execute(cls.to_python, data, _with_transaction=False)
+        value = safe_execute(cls.to_python, data, datapath=[key], _with_transaction=False)
         if not value:
             continue
 
@@ -63,11 +66,12 @@ class Interface:
     render differently than the default ``extra`` metadata in an event.
     """
 
-    _data = None
+    _data: Dict[str, Any]
     score = 0
     display_score = None
     ephemeral = False
     grouping_variants = ["default"]
+    datapath = None
 
     def __init__(self, **data):
         self._data = data or {}
@@ -107,14 +111,27 @@ class Interface:
             self._data[name] = value
 
     @classmethod
-    def to_python(cls, data):
+    def to_python(cls, data, datapath: Optional[DataPath] = None):
         """Creates a python interface object from the given raw data.
 
         This function can assume fully normalized and valid data. It can create
         defaults where data is missing but does not need to handle interface
         validation.
         """
-        return cls(**data) if data is not None else None
+        if data is None:
+            return None
+
+        rv = cls(**data)
+        object.__setattr__(rv, "datapath", datapath)
+        return rv
+
+    @classmethod
+    def to_python_subpath(cls, data, path: DataPath, datapath: Optional[DataPath] = None):
+        if data is None:
+            return None
+
+        subdata = get_path(data, *path)
+        return cls.to_python(subdata, datapath=datapath + path if datapath else None)
 
     def get_raw_data(self):
         """Returns the underlying raw data."""

+ 2 - 2
src/sentry/interfaces/breadcrumbs.py

@@ -26,13 +26,13 @@ class Breadcrumbs(Interface):
     score = 800
 
     @classmethod
-    def to_python(cls, data):
+    def to_python(cls, data, **kwargs):
         values = []
         for index, crumb in enumerate(get_path(data, "values", filter=True, default=())):
             # TODO(ja): Handle already invalid and None breadcrumbs
             values.append(cls.normalize_crumb(crumb))
 
-        return cls(values=values)
+        return super().to_python({"values": values}, **kwargs)
 
     def to_json(self):
         return prune_empty_keys(

Some files were not shown because too many files changed in this diff