Browse Source

feat(starfish): Introduce trend based percent change (#51392)

- This changes the percentile_percent_change function to use the
simpleLinearRegression so that we're taking more into account than just
the first and second half of the current time range
- Using the finalizeAggregation function which turns all the percentile
buckets back into usable numbers
William Mak 1 year ago
parent
commit
229eda2231

+ 16 - 0
src/sentry/search/events/builder/discover.py

@@ -101,6 +101,22 @@ class BaseQueryBuilder:
             raise InvalidSearchQuery("Need both start & end to use percent_change")
         return self.start + (self.end - self.start) / 2
 
+    def get_regression_value(self, x: datetime, linearRegression: Function, alias: str):
+        return Function(
+            "plus",
+            [
+                Function(
+                    "multiply",
+                    [
+                        Function("toUnixTimestamp", [x]),
+                        Function("tupleElement", [linearRegression, 1]),
+                    ],
+                ),
+                Function("tupleElement", [linearRegression, 2]),
+            ],
+            alias,
+        )
+
     def first_half_condition(self):
         """Create the first half condition for percent_change functions"""
         return Function(

+ 1 - 0
src/sentry/search/events/constants.py

@@ -237,6 +237,7 @@ SPAN_FUNCTION_ALIASES = {
     "sps_percent_change": "eps_percent_change",
     "spm_percent_change": "epm_percent_change",
 }
+SPAN_PERCENTILE_INDEXES = [0.5, 0.75, 0.9, 0.95, 0.99]
 
 # Mapping of public aliases back to the metrics identifier
 METRICS_MAP = {

+ 23 - 12
src/sentry/search/events/datasets/spans_metrics.py

@@ -5,7 +5,7 @@ from typing import Callable, Mapping, Optional, Union
 from snuba_sdk import Column, Function, OrderBy
 
 from sentry.api.event_search import SearchFilter
-from sentry.exceptions import IncompatibleMetricsQuery
+from sentry.exceptions import IncompatibleMetricsQuery, InvalidSearchQuery
 from sentry.search.events import builder, constants, fields
 from sentry.search.events.datasets import function_aliases
 from sentry.search.events.datasets.base import DatasetConfig
@@ -494,17 +494,28 @@ class SpansMetricsDatasetConfig(DatasetConfig):
         args: Mapping[str, Union[str, Column, SelectType, int, float]],
         alias: Optional[str] = None,
     ) -> SelectType:
-        first_half = function_aliases.resolve_metrics_percentile(
-            args=args,
-            alias=None,
-            fixed_percentile=args["percentile"],
-            extra_conditions=[self.builder.first_half_condition()],
+        percentile = args["percentile"]
+        if percentile not in constants.SPAN_PERCENTILE_INDEXES:
+            raise InvalidSearchQuery(f"percentile_percent_change doesn't support {percentile}")
+        linear_regression = Function(
+            "simpleLinearRegression",
+            [
+                Function("toUnixTimestamp", [self.builder.column("timestamp")]),
+                Function(
+                    "arrayElement",
+                    [
+                        Function("finalizeAggregation", [Column("percentiles")]),
+                        constants.SPAN_PERCENTILE_INDEXES.index(args["percentile"]) + 1,
+                    ],
+                ),
+            ],
+            f"{alias}_linear_regression",
         )
-        second_half = function_aliases.resolve_metrics_percentile(
-            args=args,
-            alias=None,
-            fixed_percentile=args["percentile"],
-            extra_conditions=[self.builder.second_half_condition()],
+        first_half = self.builder.get_regression_value(
+            self.builder.start, linear_regression, f"{alias}_first_half"
+        )
+        second_half = self.builder.get_regression_value(
+            self.builder.end, linear_regression, f"{alias}_second_half"
         )
         return self._resolve_percent_change_function(first_half, second_half, alias)
 
@@ -544,7 +555,7 @@ class SpansMetricsDatasetConfig(DatasetConfig):
                             "minus",
                             [second_half, first_half],
                         ),
-                        first_half,
+                        Function("abs", [first_half]),
                     ],
                 ),
                 None,

+ 22 - 6
tests/snuba/api/endpoints/test_organization_events_span_metrics.py

@@ -256,20 +256,33 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
         assert meta["fields"]["http_error_count()"] == "integer"
         assert meta["fields"]["http_error_rate()"] == "percentage"
 
-    def percentile_percent_change(self):
+    def test_percentile_percent_change(self):
         self.store_span_metric(
             5,
+            tags={"description": "foo_description"},
             timestamp=self.six_min_ago,
         )
         self.store_span_metric(
             10,
+            tags={"description": "foo_description"},
+            timestamp=self.min_ago,
+        )
+
+        self.store_span_metric(
+            10,
+            tags={"description": "bar_description"},
+            timestamp=self.six_min_ago,
+        )
+        self.store_span_metric(
+            5,
+            tags={"description": "bar_description"},
             timestamp=self.min_ago,
         )
         response = self.do_request(
             {
-                "field": ["percentile_percent_change(span.duration)"],
+                "field": ["description", "percentile_percent_change(span.duration, 0.95)"],
                 "query": "",
-                "orderby": ["-percentile_percent_change()"],
+                "orderby": ["-percentile_percent_change(span.duration, 0.95)"],
                 "project": self.project.id,
                 "dataset": "spansMetrics",
                 "statsPeriod": "10m",
@@ -278,10 +291,13 @@ class OrganizationEventsMetricsEnhancedPerformanceEndpointTest(MetricsEnhancedPe
         assert response.status_code == 200, response.content
         data = response.data["data"]
         meta = response.data["meta"]
-        assert len(data) == 1
-        assert data[0]["percentile_percent_change()"] == 1
+        assert len(data) == 2
+        assert data[0]["description"] == "foo_description"
+        assert data[0]["percentile_percent_change(span.duration, 0.95)"] > 0
+        assert data[1]["description"] == "bar_description"
+        assert data[1]["percentile_percent_change(span.duration, 0.95)"] < 0
         assert meta["dataset"] == "spansMetrics"
-        assert meta["fields"]["percentile_percent_change()"] == "percentage"
+        assert meta["fields"]["percentile_percent_change(span.duration, 0.95)"] == "percentage"
 
     def test_http_error_count_percent_change(self):
         for _ in range(4):