Browse Source

feat(starfish): add span domain array (#56494)

- This adds a new field alias that will eventually replace `span.domain`
it will split the domain string by `,` and return an array of span
domains
- This allows filtering on the span.domain field as if it was a real
array
William Mak 1 year ago
parent
commit
64d5827030

+ 2 - 0
src/sentry/search/events/constants.py

@@ -43,6 +43,8 @@ HTTP_STATUS_CODE_ALIAS = "http.status_code"
 DEVICE_CLASS_ALIAS = "device.class"
 TOTAL_SPAN_DURATION_ALIAS = "total.span_duration"
 SPAN_MODULE_ALIAS = "span.module"
+SPAN_DOMAIN_ALIAS = "span.domain_array"
+SPAN_DOMAIN_SEPARATOR = ","
 
 
 class ThresholdDict(TypedDict):

+ 58 - 3
src/sentry/search/events/datasets/spans_metrics.py

@@ -3,7 +3,7 @@ from __future__ import annotations
 from typing import Callable, Mapping, Optional, Union
 
 import sentry_sdk
-from snuba_sdk import AliasedExpression, Column, Function, OrderBy
+from snuba_sdk import AliasedExpression, Column, Condition, Function, Identifier, Op, OrderBy
 
 from sentry.api.event_search import SearchFilter
 from sentry.exceptions import IncompatibleMetricsQuery
@@ -26,11 +26,16 @@ class SpansMetricsDatasetConfig(DatasetConfig):
     def search_filter_converter(
         self,
     ) -> Mapping[str, Callable[[SearchFilter], Optional[WhereType]]]:
-        return {}
+        return {
+            constants.SPAN_DOMAIN_ALIAS: self._span_domain_filter_converter,
+        }
 
     @property
     def field_alias_converter(self) -> Mapping[str, Callable[[str], SelectType]]:
-        return {constants.SPAN_MODULE_ALIAS: self._resolve_span_module}
+        return {
+            constants.SPAN_MODULE_ALIAS: self._resolve_span_module,
+            constants.SPAN_DOMAIN_ALIAS: self._resolve_span_domain,
+        }
 
     def resolve_metric(self, value: str) -> int:
         metric_id = self.builder.resolve_metric_index(constants.SPAN_METRICS_MAP.get(value, value))
@@ -337,9 +342,59 @@ class SpansMetricsDatasetConfig(DatasetConfig):
 
         return function_converter
 
+    def _span_domain_filter_converter(self, search_filter: SearchFilter) -> Optional[WhereType]:
+        value = search_filter.value.value
+        if search_filter.value.is_wildcard():
+            value = search_filter.value.value[1:-1]
+            return Condition(
+                Function(
+                    "arrayExists",
+                    [
+                        Function(
+                            "lambda",
+                            [
+                                Function("tuple", [Identifier("x")]),
+                                Function("match", [Identifier("x"), f"(?i){value}"]),
+                            ],
+                        ),
+                        self._resolve_span_domain(),
+                    ],
+                ),
+                Op(search_filter.operator),
+                1,
+            )
+        elif value == "":
+            operator = Op.LTE if search_filter.operator == "=" else Op.GT
+            return Condition(Function("length", [self._resolve_span_domain()]), operator, 0)
+        else:
+            return Condition(
+                Function("has", [self._resolve_span_domain(), value]),
+                Op.NEQ if search_filter.operator in constants.EQUALITY_OPERATORS else Op.EQ,
+                0,
+            )
+
     def _resolve_span_module(self, alias: str) -> SelectType:
         return field_aliases.resolve_span_module(self.builder, alias)
 
+    def _resolve_span_domain(self, alias: Optional[str] = None) -> SelectType:
+        return Function(
+            "arrayFilter",
+            [
+                Function(
+                    "lambda",
+                    [Function("tuple", [Identifier("x")]), Function("notEmpty", [Identifier("x")])],
+                ),
+                Function(
+                    "splitByChar",
+                    [
+                        constants.SPAN_DOMAIN_SEPARATOR,
+                        self.builder.column("span.domain"),
+                    ],
+                ),
+            ],
+            alias,
+        )
+
     # Query Functions
     def _resolve_count_if(
         self,

+ 148 - 0
tests/snuba/api/endpoints/test_organization_events_span_metrics.py

@@ -611,6 +611,138 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
         )
         assert response.status_code == 400, response.content
 
+    def test_span_domain_array(self):
+        self.store_span_metric(
+            321,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,"},
+        )
+        self.store_span_metric(
+            21,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,sentry_table2,"},
+        )
+        response = self.do_request(
+            {
+                "field": ["span.domain_array", "p75(span.self_time)"],
+                "query": "",
+                "project": self.project.id,
+                "orderby": ["-p75(span.self_time)"],
+                "dataset": "spansMetrics",
+            }
+        )
+        assert response.status_code == 200, response.content
+        data = response.data["data"]
+        meta = response.data["meta"]
+        assert len(data) == 2
+        assert data[0]["span.domain_array"] == ["sentry_table1"]
+        assert data[1]["span.domain_array"] == ["sentry_table1", "sentry_table2"]
+        assert meta["dataset"] == "spansMetrics"
+        assert meta["fields"]["span.domain_array"] == "array"
+
+    def test_span_domain_array_filter(self):
+        self.store_span_metric(
+            321,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,"},
+        )
+        self.store_span_metric(
+            21,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,sentry_table2,"},
+        )
+        response = self.do_request(
+            {
+                "field": ["span.domain_array", "p75(span.self_time)"],
+                "query": "span.domain_array:sentry_table2",
+                "project": self.project.id,
+                "dataset": "spansMetrics",
+            }
+        )
+        assert response.status_code == 200, response.content
+        data = response.data["data"]
+        meta = response.data["meta"]
+        assert len(data) == 1
+        assert data[0]["span.domain_array"] == ["sentry_table1", "sentry_table2"]
+        assert meta["dataset"] == "spansMetrics"
+        assert meta["fields"]["span.domain_array"] == "array"
+
+    def test_span_domain_array_filter_wildcard(self):
+        self.store_span_metric(
+            321,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,"},
+        )
+        self.store_span_metric(
+            21,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,sentry_table2,"},
+        )
+        for query in ["sentry*2", "*table2", "sentry_table2*"]:
+            response = self.do_request(
+                {
+                    "field": ["span.domain_array", "p75(span.self_time)"],
+                    "query": f"span.domain_array:{query}",
+                    "project": self.project.id,
+                    "dataset": "spansMetrics",
+                }
+            )
+            assert response.status_code == 200, response.content
+            data = response.data["data"]
+            meta = response.data["meta"]
+            assert len(data) == 1, query
+            assert data[0]["span.domain_array"] == ["sentry_table1", "sentry_table2"], query
+            assert meta["dataset"] == "spansMetrics", query
+            assert meta["fields"]["span.domain_array"] == "array"
+
+    def test_span_domain_array_has_filter(self):
+        self.store_span_metric(
+            321,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ""},
+        )
+        self.store_span_metric(
+            21,
+            internal_metric=constants.SELF_TIME_LIGHT,
+            timestamp=self.min_ago,
+            tags={"span.domain": ",sentry_table1,sentry_table2,"},
+        )
+        response = self.do_request(
+            {
+                "field": ["span.domain_array", "p75(span.self_time)"],
+                "query": "has:span.domain_array",
+                "project": self.project.id,
+                "dataset": "spansMetrics",
+            }
+        )
+        assert response.status_code == 200, response.content
+        data = response.data["data"]
+        meta = response.data["meta"]
+        assert len(data) == 1
+        assert data[0]["span.domain_array"] == ["sentry_table1", "sentry_table2"]
+        assert meta["dataset"] == "spansMetrics"
+        response = self.do_request(
+            {
+                "field": ["span.domain_array", "p75(span.self_time)"],
+                "query": "!has:span.domain_array",
+                "project": self.project.id,
+                "dataset": "spansMetrics",
+            }
+        )
+        assert response.status_code == 200, response.content
+        data = response.data["data"]
+        meta = response.data["meta"]
+        assert len(data) == 1
+        assert meta["dataset"] == "spansMetrics"
+        assert meta["fields"]["span.domain_array"] == "array"
+
 
 @region_silo_test
 class OrganizationEventsMetricsEnhancedPerformanceEndpointTestWithMetricLayer(
@@ -643,3 +775,19 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTestWithMetricLayer(
     @pytest.mark.xfail(reason="Not implemented")
     def test_avg_compare(self):
         super().test_avg_compare()
+
+    @pytest.mark.xfail(reason="Not implemented")
+    def test_span_domain_array(self):
+        super().test_span_domain_array()
+
+    @pytest.mark.xfail(reason="Not implemented")
+    def test_span_domain_array_filter(self):
+        super().test_span_domain_array_filter()
+
+    @pytest.mark.xfail(reason="Not implemented")
+    def test_span_domain_array_filter_wildcard(self):
+        super().test_span_domain_array_filter_wildcard()
+
+    @pytest.mark.xfail(reason="Not implemented")
+    def test_span_domain_array_has_filter(self):
+        super().test_span_domain_array_has_filter()