Browse Source

feat(spans): Add span tags endpoint (#70143)

To support autocompletion better in the trace explorer, we need to
return the span tags and autcomplete the values.
Tony Xiao 10 months ago
parent
commit
913b0bb319

+ 146 - 0
src/sentry/api/endpoints/organization_spans_fields.py

@@ -0,0 +1,146 @@
+from typing import cast
+
+import sentry_sdk
+from rest_framework.request import Request
+from rest_framework.response import Response
+
+from sentry import features, options
+from sentry.api.api_owners import ApiOwner
+from sentry.api.api_publish_status import ApiPublishStatus
+from sentry.api.base import region_silo_endpoint
+from sentry.api.bases import NoProjects, OrganizationEventsV2EndpointBase
+from sentry.api.paginator import SequencePaginator
+from sentry.api.serializers import serialize
+from sentry.api.utils import handle_query_errors
+from sentry.search.events.builder import SpansIndexedQueryBuilder
+from sentry.search.events.types import ParamsType, QueryBuilderConfig
+from sentry.snuba.dataset import Dataset
+from sentry.snuba.referrer import Referrer
+from sentry.tagstore.types import TagKey, TagValue
+
+
+@region_silo_endpoint
+class OrganizationSpansFieldsEndpoint(OrganizationEventsV2EndpointBase):
+    publish_status = {
+        "GET": ApiPublishStatus.PRIVATE,
+    }
+    owner = ApiOwner.PERFORMANCE
+
+    def get(self, request: Request, organization) -> Response:
+        if not features.has(
+            "organizations:performance-trace-explorer", organization, actor=request.user
+        ):
+            return Response(status=404)
+
+        try:
+            snuba_params, params = self.get_snuba_dataclass(request, organization)
+        except NoProjects:
+            return self.paginate(
+                request=request,
+                paginator=SequencePaginator([]),
+            )
+
+        max_span_tags = options.get("performance.spans-tags-key.max")
+
+        with handle_query_errors():
+            # This has the limitations that we cannot paginate and
+            # we do not provide any guarantees around which tag keys
+            # are returned if the total exceeds the limit.
+            builder = SpansIndexedQueryBuilder(
+                Dataset.SpansIndexed,
+                params=cast(ParamsType, params),
+                snuba_params=snuba_params,
+                query=None,
+                selected_columns=["array_join(tags.key)"],
+                orderby=None,
+                limitby=("array_join(tags.key)", 1),
+                limit=max_span_tags,
+                sample_rate=options.get("performance.spans-tags-key.sample-rate"),
+                config=QueryBuilderConfig(
+                    transform_alias_to_input_format=True,
+                    functions_acl=["array_join"],
+                ),
+            )
+
+            results = builder.process_results(builder.run_query(Referrer.API_SPANS_TAG_KEYS.value))
+
+        paginator = SequencePaginator(
+            [
+                # TODO: prepend the list of sentry defined fields here
+                (row["array_join(tags.key)"], TagKey(row["array_join(tags.key)"]))
+                for row in results["data"]
+            ]
+        )
+
+        return self.paginate(
+            request=request,
+            paginator=paginator,
+            on_results=lambda results: serialize(results, request.user),
+            default_per_page=max_span_tags,
+            max_per_page=max_span_tags,
+        )
+
+
+@region_silo_endpoint
+class OrganizationSpansFieldValuesEndpoint(OrganizationEventsV2EndpointBase):
+    publish_status = {
+        "GET": ApiPublishStatus.PRIVATE,
+    }
+    owner = ApiOwner.PERFORMANCE
+
+    def get(self, request: Request, organization, key: str) -> Response:
+        if not features.has(
+            "organizations:performance-trace-explorer", organization, actor=request.user
+        ):
+            return Response(status=404)
+
+        try:
+            snuba_params, params = self.get_snuba_dataclass(request, organization)
+        except NoProjects:
+            return self.paginate(
+                request=request,
+                paginator=SequencePaginator([]),
+            )
+
+        sentry_sdk.set_tag("query.tag_key", key)
+
+        max_span_tags = options.get("performance.spans-tags-values.max")
+
+        with handle_query_errors():
+            builder = SpansIndexedQueryBuilder(
+                Dataset.SpansIndexed,
+                params=cast(ParamsType, params),
+                snuba_params=snuba_params,
+                query=None,
+                selected_columns=[key, "count()", "min(timestamp)", "max(timestamp)"],
+                orderby="-count()",
+                limit=max_span_tags,
+                sample_rate=options.get("performance.spans-tags-key.sample-rate"),
+                config=QueryBuilderConfig(
+                    transform_alias_to_input_format=True,
+                ),
+            )
+
+            results = builder.process_results(builder.run_query(Referrer.API_SPANS_TAG_KEYS.value))
+
+        paginator = SequencePaginator(
+            [
+                (
+                    row[key],
+                    TagValue(
+                        key=key,
+                        value=row[key],
+                        times_seen=row["count()"],
+                        first_seen=row["min(timestamp)"],
+                        last_seen=row["max(timestamp)"],
+                    ),
+                )
+                for row in results["data"]
+            ]
+        )
+
+        return self.paginate(
+            request=request,
+            paginator=paginator,
+            on_results=lambda results: serialize(results, request.user),
+        )

+ 14 - 0
src/sentry/api/urls.py

@@ -467,6 +467,10 @@ from .endpoints.organization_sentry_function_details import (
 from .endpoints.organization_sessions import OrganizationSessionsEndpoint
 from .endpoints.organization_shortid import ShortIdLookupEndpoint
 from .endpoints.organization_slugs import SlugsUpdateEndpoint
+from .endpoints.organization_spans_fields import (
+    OrganizationSpansFieldsEndpoint,
+    OrganizationSpansFieldValuesEndpoint,
+)
 from .endpoints.organization_stats import OrganizationStatsEndpoint
 from .endpoints.organization_stats_v2 import OrganizationStatsEndpointV2
 from .endpoints.organization_tagkey_values import OrganizationTagKeyValuesEndpoint
@@ -1398,6 +1402,16 @@ ORGANIZATION_URLS = [
         OrganizationTracesEndpoint.as_view(),
         name="sentry-api-0-organization-traces",
     ),
+    re_path(
+        r"^(?P<organization_slug>[^\/]+)/spans/fields/$",
+        OrganizationSpansFieldsEndpoint.as_view(),
+        name="sentry-api-0-organization-spans-fields",
+    ),
+    re_path(
+        r"^(?P<organization_slug>[^\/]+)/spans/fields/(?P<key>[^/]+)/values/$",
+        OrganizationSpansFieldValuesEndpoint.as_view(),
+        name="sentry-api-0-organization-spans-fields-values",
+    ),
     re_path(
         r"^(?P<organization_slug>[^\/]+)/metrics-estimation-stats/$",
         OrganizationMetricsEstimationStatsEndpoint.as_view(),

+ 24 - 0
src/sentry/options/defaults.py

@@ -1710,6 +1710,30 @@ register(
     default=10000,
     flags=FLAG_AUTOMATOR_MODIFIABLE,
 )
+register(
+    "performance.spans-tags-key.sample-rate",
+    type=Float,
+    default=1.0,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
+register(
+    "performance.spans-tags-key.max",
+    type=Int,
+    default=1000,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
+register(
+    "performance.spans-tags-value.sample-rate",
+    type=Float,
+    default=1.0,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
+register(
+    "performance.spans-tags-values.max",
+    type=Int,
+    default=1000,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
 
 # Dynamic Sampling system-wide options
 # Size of the sliding window used for dynamic sampling. It is defaulted to 24 hours.

+ 9 - 3
src/sentry/search/events/builder/discover.py

@@ -278,9 +278,6 @@ class BaseQueryBuilder:
             self.orderby_converter,
         ) = self.load_config()
 
-        self.limitby = self.resolve_limitby(limitby)
-        self.array_join = None if array_join is None else [self.resolve_column(array_join)]
-
         self.start: datetime | None = None
         self.end: datetime | None = None
         self.resolve_query(
@@ -292,6 +289,9 @@ class BaseQueryBuilder:
         )
         self.entity = entity
 
+        self.limitby = self.resolve_limitby(limitby)
+        self.array_join = None if array_join is None else [self.resolve_column(array_join)]
+
     def are_columns_resolved(self) -> bool:
         return self.columns and isinstance(self.columns[0], Function)
 
@@ -401,6 +401,12 @@ class BaseQueryBuilder:
         if isinstance(resolved, Column):
             return LimitBy([resolved], count)
 
+        # Special case to allow limit bys on array joined columns.
+        # Simply allowing any function to be used in a limit by
+        # result in hard to debug issues so be careful.
+        if isinstance(resolved, Function) and resolved.function == "arrayJoin":
+            return LimitBy([Column(resolved.alias)], count)
+
         # TODO: Limit By can only operate on a `Column`. This has the implication
         # that non aggregate transforms are not allowed in the order by clause.
         raise InvalidSearchQuery(f"{column} used in a limit by but is not a column.")

+ 8 - 0
src/sentry/search/events/datasets/spans_indexed.py

@@ -10,6 +10,7 @@ from sentry.search.events import builder, constants
 from sentry.search.events.datasets import field_aliases, filter_aliases, function_aliases
 from sentry.search.events.datasets.base import DatasetConfig
 from sentry.search.events.fields import (
+    ColumnArg,
     ColumnTagArg,
     IntervalDefault,
     NullableNumberRange,
@@ -321,6 +322,13 @@ class SpansIndexedDatasetConfig(DatasetConfig):
                     default_result_type="duration",
                     private=True,
                 ),
+                SnQLFunction(
+                    "array_join",
+                    required_args=[ColumnArg("column", allowed_columns=["tags.key"])],
+                    snql_column=lambda args, alias: Function("arrayJoin", [args["column"]], alias),
+                    default_result_type="string",
+                    private=True,
+                ),
             ]
         }
 

+ 1 - 0
src/sentry/snuba/referrer.py

@@ -441,6 +441,7 @@ class Referrer(Enum):
     API_TRACE_EXPLORER_METRICS_SPANS_LIST = "api.trace-explorer.metrics-spans-list"
     API_TRACE_EXPLORER_SPANS_LIST = "api.trace-explorer.spans-list"
     API_TRACE_EXPLORER_TRACES_META = "api.trace-explorer.traces-meta"
+    API_SPANS_TAG_KEYS = "api.spans.tags-keys"
 
     # Performance Mobile UI Module
     API_PERFORMANCE_MOBILE_UI_BAR_CHART = "api.performance.mobile.ui.bar-chart"

+ 1 - 0
src/sentry/utils/snuba.py

@@ -157,6 +157,7 @@ SPAN_COLUMN_MAP = {
     "sdk.name": "sentry_tags[sdk.name]",
     "trace.status": "sentry_tags[trace.status]",
     "messaging.destination.name": "sentry_tags[messaging.destination.name]",
+    "tags.key": "tags.key",
 }
 
 METRICS_SUMMARIES_COLUMN_MAP = {

+ 135 - 0
tests/sentry/api/endpoints/test_organization_spans_fields.py

@@ -0,0 +1,135 @@
+from uuid import uuid4
+
+from django.urls import reverse
+
+from sentry.testutils.cases import APITestCase, BaseSpansTestCase
+from sentry.testutils.helpers.datetime import before_now
+
+
+class OrganizationSpansTagsEndpointTest(BaseSpansTestCase, APITestCase):
+    view = "sentry-api-0-organization-spans-fields"
+
+    def setUp(self):
+        super().setUp()
+        self.login_as(user=self.user)
+
+    def do_request(self, features=None, **kwargs):
+        if features is None:
+            features = ["organizations:performance-trace-explorer"]
+        with self.feature(features):
+            return self.client.get(
+                reverse(self.view, kwargs={"organization_slug": self.organization.slug}),
+                format="json",
+                **kwargs,
+            )
+
+    def test_no_feature(self):
+        response = self.do_request(features=[])
+        assert response.status_code == 404, response.data
+
+    def test_no_project(self):
+        response = self.do_request()
+        assert response.status_code == 200, response.data
+        assert response.data == []
+
+    def test_tags(self):
+        for tag in ["foo", "bar", "baz"]:
+            self.store_segment(
+                self.project.id,
+                uuid4().hex,
+                uuid4().hex,
+                span_id=uuid4().hex[:15],
+                parent_span_id=None,
+                timestamp=before_now(days=0, minutes=10).replace(microsecond=0),
+                transaction="foo",
+                duration=100,
+                exclusive_time=100,
+                tags={tag: tag},
+            )
+        query = {
+            "project": [self.project.id],
+        }
+        response = self.do_request(query=query)
+        assert response.status_code == 200, response.data
+        assert response.data == [
+            {"key": "bar", "name": "Bar"},
+            {"key": "baz", "name": "Baz"},
+            {"key": "foo", "name": "Foo"},
+        ]
+
+
+class OrganizationSpansTagKeyValuesEndpointTest(BaseSpansTestCase, APITestCase):
+    view = "sentry-api-0-organization-spans-fields-values"
+
+    def setUp(self):
+        super().setUp()
+        self.login_as(user=self.user)
+
+    def do_request(self, key: str, features=None, **kwargs):
+        if features is None:
+            features = ["organizations:performance-trace-explorer"]
+        with self.feature(features):
+            return self.client.get(
+                reverse(
+                    self.view, kwargs={"organization_slug": self.organization.slug, "key": key}
+                ),
+                format="json",
+                **kwargs,
+            )
+
+    def test_no_feature(self):
+        response = self.do_request("tag", features=[])
+        assert response.status_code == 404, response.data
+
+    def test_no_project(self):
+        response = self.do_request("tag")
+        assert response.status_code == 200, response.data
+        assert response.data == []
+
+    def test_tags_keys(self):
+        timestamp = before_now(days=0, minutes=10).replace(microsecond=0)
+        for tag in ["foo", "bar", "baz"]:
+            self.store_segment(
+                self.project.id,
+                uuid4().hex,
+                uuid4().hex,
+                span_id=uuid4().hex[:15],
+                parent_span_id=None,
+                timestamp=timestamp,
+                transaction="foo",
+                duration=100,
+                exclusive_time=100,
+                tags={"tag": tag},
+            )
+
+        query = {
+            "project": [self.project.id],
+        }
+        response = self.do_request("tag", query=query)
+        assert response.status_code == 200, response.data
+        assert response.data == [
+            {
+                "count": 1,
+                "key": "tag",
+                "value": "bar",
+                "name": "bar",
+                "firstSeen": timestamp.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+                "lastSeen": timestamp.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+            },
+            {
+                "count": 1,
+                "key": "tag",
+                "value": "baz",
+                "name": "baz",
+                "firstSeen": timestamp.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+                "lastSeen": timestamp.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+            },
+            {
+                "count": 1,
+                "key": "tag",
+                "value": "foo",
+                "name": "foo",
+                "firstSeen": timestamp.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+                "lastSeen": timestamp.strftime("%Y-%m-%dT%H:%M:%S+00:00"),
+            },
+        ]