Browse Source

feat(suspect-spans): Add group hashes to spans (#28184)

Span descriptions are unbounded free-form strings. This makes it difficult to
store them in snuba for grouping purposes. This change introduces a group hash
to get a fixed-length string that can be used to classify spans in the same
group. The first iteration will use a naive hash of the span description with
additional processing in upcoming changes.
Tony Xiao 3 years ago
parent
commit
91a8e52e3e

+ 1 - 0
mypy.ini

@@ -50,6 +50,7 @@ files = src/sentry/api/bases/external_actor.py,
         src/sentry/shared_integrations/constants.py,
         src/sentry/snuba/outcomes.py,
         src/sentry/snuba/query_subscription_consumer.py,
+        src/sentry/spans/**/*.py,
         src/sentry/tasks/app_store_connect.py,
         src/sentry/tasks/update_user_reports.py,
         src/sentry/unmerge.py,

+ 22 - 0
src/sentry/event_manager.py

@@ -1697,6 +1697,27 @@ def _calculate_event_grouping(project, event, grouping_config) -> CalculatedHash
     return hashes
 
 
+def _calculate_span_grouping(jobs, projects):
+    for job in jobs:
+        # Make sure this snippet doesn't crash ingestion
+        # as the feature is under development.
+        try:
+            event = job["event"]
+            project = projects[job["project_id"]]
+
+            if not features.has(
+                "organizations:performance-suspect-spans-ingestion",
+                project.organization,
+                actor=None,
+            ):
+                continue
+
+            groupings = event.get_span_groupings()
+            groupings.write_to_event(event.data)
+        except Exception:
+            sentry_sdk.capture_exception()
+
+
 @metrics.wraps("event_manager.save_transaction_events")
 def save_transaction_events(jobs, projects):
     with metrics.timer("event_manager.save_transactions.collect_organization_ids"):
@@ -1730,6 +1751,7 @@ def save_transaction_events(jobs, projects):
     _get_event_user_many(jobs, projects)
     _derive_plugin_tags_many(jobs, projects)
     _derive_interface_tags_many(jobs)
+    _calculate_span_grouping(jobs, projects)
     _materialize_metadata_many(jobs)
     _get_or_create_environment_many(jobs, projects)
     _get_or_create_release_associated_models(jobs, projects)

+ 5 - 0
src/sentry/eventstore/models.py

@@ -16,6 +16,7 @@ from sentry.grouping.result import CalculatedHashes
 from sentry.interfaces.base import get_interfaces
 from sentry.models import EventDict
 from sentry.snuba.events import Columns
+from sentry.spans.grouping.api import load_span_grouping_config
 from sentry.utils import json
 from sentry.utils.cache import memoize
 from sentry.utils.canonical import CanonicalKeyView
@@ -471,6 +472,10 @@ class Event:
 
         return None
 
+    def get_span_groupings(self, force_config=None):
+        config = load_span_grouping_config(force_config)
+        return config.execute_strategy(self.data)
+
     @property
     def organization(self):
         return self.project.organization

+ 0 - 0
src/sentry/spans/__init__.py


+ 0 - 0
src/sentry/spans/grouping/__init__.py


+ 26 - 0
src/sentry/spans/grouping/api.py

@@ -0,0 +1,26 @@
+from typing import Any, Optional
+
+from sentry.spans.grouping.strategy.config import (
+    CONFIGURATIONS,
+    DEFAULT_CONFIG_ID,
+    SpanGroupingConfig,
+)
+
+
+class SpanGroupingConfigNotFound(LookupError):
+    pass
+
+
+def load_span_grouping_config(config: Optional[Any] = None) -> SpanGroupingConfig:
+    if config is None:
+        config_id = DEFAULT_CONFIG_ID
+
+    else:
+        if "id" not in config:
+            raise ValueError("Malformed configuration: missing 'id'")
+        config_id = config["id"]
+
+    if config_id not in CONFIGURATIONS:
+        raise SpanGroupingConfigNotFound(config_id)
+
+    return CONFIGURATIONS[config_id]

+ 55 - 0
src/sentry/spans/grouping/result.py

@@ -0,0 +1,55 @@
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+
+@dataclass(frozen=True)
+class SpanGroupingResults:
+    id: str
+    results: Dict[str, str]
+
+    @classmethod
+    def from_event(cls, event_data: Any) -> Optional["SpanGroupingResults"]:
+        grouping_config = event_data.get("span_grouping_config")
+        if grouping_config is None or grouping_config.get("id") is None:
+            return None
+
+        results: Dict[str, str] = {}
+
+        # check the spans in the transaction
+        for span in event_data.get("spans", []):
+            span_id = span.get("span_id")
+            span_hash = span.get("hash")
+            if span_id is None or span_hash is None:
+                # Every span should have a span id and hash.
+                # If not, return None to indicate that the grouping
+                # results could not be constructed from the event.
+                return None
+            results[span_id] = span_hash
+
+        # check the transaction root span
+        trace_context = event_data["contexts"]["trace"]
+        span_id = trace_context.get("span_id")
+        span_hash = trace_context.get("hash")
+        if span_id is None or span_hash is None:
+            # Every span should have a span id and hash.
+            # If not, return None to indicate that the grouping
+            # results could not be constructed from the event.
+            return None
+        results[span_id] = span_hash
+
+        return cls(grouping_config["id"], results)
+
+    def write_to_event(self, event_data: Any) -> None:
+        # write the hashes of the spans in the transaction
+        for span in event_data.get("spans", []):
+            span_hash = self.results.get(span["span_id"])
+            if span_hash is not None:
+                span["hash"] = span_hash
+
+        # write the hash of the transaction root spans
+        trace_context = event_data["contexts"]["trace"]
+        span_hash = self.results.get(trace_context["span_id"])
+        if span_hash is not None:
+            trace_context["hash"] = span_hash
+
+        event_data["span_grouping_config"] = {"id": self.id}

+ 0 - 0
src/sentry/spans/grouping/strategy/__init__.py


+ 194 - 0
src/sentry/spans/grouping/strategy/base.py

@@ -0,0 +1,194 @@
+import re
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Sequence
+from urllib.parse import urlparse
+
+from sentry.spans.grouping.utils import Hash, parse_fingerprint_var
+
+# TODO(3.8): This is a hack so we can get TypedDicts before 3.8
+if TYPE_CHECKING:
+    from mypy_extensions import TypedDict
+else:
+
+    def TypedDict(*args, **kwargs):
+        pass
+
+
+Span = TypedDict(
+    "Span",
+    {
+        "trace_id": str,
+        "parent_span_id": str,
+        "span_id": str,
+        "start_timestamp": float,
+        "timestamp": float,
+        "same_process_as_parent": bool,
+        "op": str,
+        "description": Optional[str],
+        "fingerprint": Optional[Sequence[str]],
+        "tags": Optional[Any],
+        "data": Optional[Any],
+    },
+)
+
+
+# A callable strategy is a callable that when given a span, it tries to
+# returns a fingerprint. If the strategy does not apply to the span, it
+# should return `None` to indicate that the strategy should not be used
+# and to try a different strategy. If the strategy does apply, it should
+# return a list of strings that will serve as the span fingerprint.
+CallableStrategy = Callable[[Span], Optional[Sequence[str]]]
+
+
+@dataclass(frozen=True)
+class SpanGroupingStrategy:
+    name: str
+    # The strategies to use with the default fingerprint
+    strategies: Sequence[CallableStrategy]
+
+    def execute(self, event_data: Any) -> Dict[str, str]:
+        spans = event_data.get("spans", [])
+        span_groups = {span["span_id"]: self.get_span_group(span) for span in spans}
+
+        # make sure to get the group id for the transaction root span
+        span_id = event_data["contexts"]["trace"]["span_id"]
+        span_groups[span_id] = self.get_transaction_span_group(event_data)
+
+        return span_groups
+
+    def get_transaction_span_group(self, event_data: Any) -> str:
+        result = Hash()
+        result.update(event_data["transaction"])
+        return result.hexdigest()
+
+    def get_span_group(self, span: Span) -> str:
+        fingerprints = span.get("fingerprint") or ["{{ default }}"]
+
+        result = Hash()
+
+        for fingerprint in fingerprints:
+            values: Sequence[str] = [fingerprint]
+
+            var = parse_fingerprint_var(fingerprint)
+            if var == "default":
+                values = self.handle_default_fingerprint(span)
+
+            result.update(values)
+
+        return result.hexdigest()
+
+    def handle_default_fingerprint(self, span: Span) -> Sequence[str]:
+        span_group = None
+
+        # Try using all of the strategies in order to generate
+        # the appropriate span group. The first strategy that
+        # successfully generates a span group will be chosen.
+        for strategy in self.strategies:
+            span_group = strategy(span)
+            if span_group is not None:
+                break
+
+        # If no strategies generated a valid span group,
+        # fall back to using the raw description strategy
+        if span_group is None:
+            span_group = raw_description_strategy(span)
+
+        return span_group
+
+
+def span_op(op_name: str) -> Callable[[CallableStrategy], CallableStrategy]:
+    def wrapped(fn: CallableStrategy) -> CallableStrategy:
+        return lambda span: fn(span) if span.get("op") == op_name else None
+
+    return wrapped
+
+
+def raw_description_strategy(span: Span) -> Sequence[str]:
+    """The catch-all strategy to use if all other strategies fail. This
+    strategy is only effective if the span description is a fixed string.
+    Otherwise, this strategy will produce a large number of span groups.
+    """
+    return [span.get("description") or ""]
+
+
+IN_CONDITION_PATTERN = re.compile(r" IN \(%s(\s*,\s*%s)*\)")
+
+
+@span_op("db")
+def normalized_db_span_in_condition_strategy(span: Span) -> Optional[Sequence[str]]:
+    """For a `db` span, the `IN` condition contains the same same number of elements
+    on the right hand side as the raw query. This results in identical queries that
+    have different number of elements on the right hand side to be seen as different
+    spans. We want these spans to be seen as similar spans, so we normalize the right
+    hand side of `IN` conditions to `(%s) to use in the fingerprint.
+    """
+    description = span.get("description") or ""
+    cleaned, count = IN_CONDITION_PATTERN.subn(" IN (%s)", description)
+    if count == 0:
+        return None
+    return [cleaned]
+
+
+HTTP_METHODS = {
+    "GET",
+    "HEAD",
+    "POST",
+    "PUT",
+    "DELETE",
+    "CONNECT",
+    "OPTIONS",
+    "TRACE",
+    "PATCH",
+}
+
+
+@span_op("http.client")
+def remove_http_client_query_string_strategy(span: Span) -> Optional[Sequence[str]]:
+    """For a `http.client` span, the fingerprint to use is
+
+    - The http method
+    - The url scheme
+    - The url domain
+    - The url path
+
+    This strategy means that different url path parameters are seen as different
+    spans but different url query parameters are seen as same spans.
+
+    For example,
+
+    `GET https://sentry.io/organizations/this-org/issues/` and
+    `GET https://sentry.io/organizations/that-org/issues/` differ in the url path.
+    Therefore, these are different spans.
+
+    `GET https://sentry.io/organizations/this-org/issues/?id=1` and
+    `GET https://sentry.io/organizations/this-org/issues/?id=2` differ in the query
+    string. Therefore, these are similar spans.
+    """
+
+    # Check the description is of the form `<HTTP METHOD> <URL>`
+    description = span.get("description") or ""
+    parts = description.split(" ", 1)
+    if len(parts) != 2:
+        return None
+
+    # Ensure that this is a valid http method
+    method, url_str = parts
+    method = method.upper()
+    if method not in HTTP_METHODS:
+        return None
+
+    url = urlparse(url_str)
+    return [method, url.scheme, url.netloc, url.path]
+
+
+@span_op("redis")
+def remove_redis_command_arguments_strategy(span: Span) -> Optional[Sequence[str]]:
+    """For a `redis` span, the fingerprint to use is simply the redis command name.
+    The arguments to the redis command is highly variable and therefore not used as
+    a part of the fingerprint.
+    """
+    description = span.get("description") or ""
+    parts = description.split(" ", 1)
+
+    # the redis command name is the first word in the description
+    return [parts[0]]

+ 52 - 0
src/sentry/spans/grouping/strategy/config.py

@@ -0,0 +1,52 @@
+from dataclasses import dataclass
+from typing import Any, Dict, Sequence
+
+from sentry.spans.grouping.result import SpanGroupingResults
+from sentry.spans.grouping.strategy.base import (
+    CallableStrategy,
+    SpanGroupingStrategy,
+    normalized_db_span_in_condition_strategy,
+    remove_http_client_query_string_strategy,
+    remove_redis_command_arguments_strategy,
+)
+
+
+@dataclass(frozen=True)
+class SpanGroupingConfig:
+    id: str
+    strategy: SpanGroupingStrategy
+
+    def execute_strategy(self, event_data: Any) -> SpanGroupingResults:
+        # If there are hashes using the same grouping config stored
+        # in the data, they should be reused. Otherwise, fall back to
+        # generating new hashes using the data.
+        grouping_results = SpanGroupingResults.from_event(event_data)
+        if grouping_results is not None and grouping_results.id == self.id:
+            return grouping_results
+
+        results = self.strategy.execute(event_data)
+        return SpanGroupingResults(self.id, results)
+
+
+CONFIGURATIONS: Dict[str, SpanGroupingConfig] = {}
+
+
+def register_configuration(config_id: str, strategies: Sequence[CallableStrategy]) -> None:
+    if config_id in CONFIGURATIONS:
+        raise ValueError(f"Duplicate configuration id: {config_id}")
+
+    strategy = SpanGroupingStrategy(config_id, [] if strategies is None else strategies)
+    CONFIGURATIONS[config_id] = SpanGroupingConfig(config_id, strategy)
+
+
+DEFAULT_CONFIG_ID = "default:2021-08-25"
+
+
+register_configuration(
+    "default:2021-08-25",
+    strategies=[
+        normalized_db_span_in_condition_strategy,
+        remove_http_client_query_string_strategy,
+        remove_redis_command_arguments_strategy,
+    ],
+)

Some files were not shown because too many files changed in this diff