Browse Source

feat(statistical-detectors): Add new functions with breakpoint for fu… (#57509)

…nctions

To support the statistical detector issues, this change is introducing a
few new percentile functions that compute the percentile before/after
the breakpoint, and to return the delta between them.
Tony Xiao 1 year ago
parent
commit
b5200182ca

+ 80 - 18
src/sentry/search/events/datasets/profile_functions.py

@@ -21,7 +21,7 @@ from sentry.search.events.fields import (
     NumberRange,
     NumericColumn,
     SnQLFunction,
-    SnQLStringArg,
+    TimestampArg,
     with_default,
 )
 from sentry.search.events.types import NormalizedArg, ParamsType, SelectType, WhereType
@@ -287,7 +287,7 @@ class ProfileFunctionsDatasetConfig(DatasetConfig):
                     default_result_type="string",  # TODO: support array type
                 ),
                 SnQLFunction(
-                    "percentiles",
+                    "percentile",
                     required_args=[
                         ProfileFunctionNumericColumn("column"),
                         NumberRange("percentile", 0, 1),
@@ -366,23 +366,44 @@ class ProfileFunctionsDatasetConfig(DatasetConfig):
                     redundant_grouping=True,
                 ),
                 SnQLFunction(
-                    "slope",
-                    required_args=[SnQLStringArg("column", allowed_strings=AGG_STATE_COLUMNS)],
-                    snql_aggregate=lambda args, alias: Function(
-                        "tupleElement",
-                        [
-                            Function(
-                                "simpleLinearRegression",
-                                [
-                                    Function("toUInt32", [SnQLColumn("timestamp")]),
-                                    Function("finalizeAggregation", [SnQLColumn(args["column"])]),
-                                ],
-                            ),
-                            1,
-                        ],
-                        alias,
+                    "percentile_before",
+                    required_args=[
+                        ProfileFunctionNumericColumn("column"),
+                        NumberRange("percentile", 0, 1),
+                        TimestampArg("timestamp"),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile_cond(
+                        args, alias, "less"
                     ),
-                    default_result_type="number",
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "percentile_after",
+                    required_args=[
+                        ProfileFunctionNumericColumn("column"),
+                        NumberRange("percentile", 0, 1),
+                        TimestampArg("timestamp"),
+                    ],
+                    snql_aggregate=lambda args, alias: self._resolve_percentile_cond(
+                        args, alias, "greater"
+                    ),
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
+                ),
+                SnQLFunction(
+                    "percentile_delta",
+                    required_args=[
+                        ProfileFunctionNumericColumn("column"),
+                        NumberRange("percentile", 0, 1),
+                        TimestampArg("timestamp"),
+                    ],
+                    snql_aggregate=self._resolve_percentile_delta,
+                    result_type_fn=self.reflective_result_type(),
+                    default_result_type="duration",
+                    redundant_grouping=True,
                 ),
             ]
         }
@@ -451,3 +472,44 @@ class ProfileFunctionsDatasetConfig(DatasetConfig):
             ],
             alias,
         )
+
+    def _resolve_percentile_cond(
+        self,
+        args: Mapping[str, Union[str, Column, SelectType, int, float]],
+        alias: str | None,
+        cond: str,
+    ) -> SelectType:
+        return Function(
+            "arrayElement",
+            [
+                Function(
+                    f'quantilesMergeIf({args["percentile"]})',
+                    [
+                        args["column"],
+                        Function(
+                            cond,
+                            [
+                                self.builder.column("timestamp"),
+                                args["timestamp"],
+                            ],
+                        ),
+                    ],
+                ),
+                1,
+            ],
+            alias,
+        )
+
+    def _resolve_percentile_delta(
+        self,
+        args: Mapping[str, Union[str, Column, SelectType, int, float]],
+        alias: str,
+    ) -> SelectType:
+        return Function(
+            "minus",
+            [
+                self._resolve_percentile_cond(args, None, "greater"),
+                self._resolve_percentile_cond(args, None, "less"),
+            ],
+            alias,
+        )

+ 22 - 1
src/sentry/search/events/fields.py

@@ -1,7 +1,7 @@
 import re
 from collections import namedtuple
 from copy import copy, deepcopy
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, List, Mapping, Match, NamedTuple, Optional, Sequence, Set, Tuple, Union
 
 import sentry_sdk
@@ -944,6 +944,27 @@ class IntervalDefault(NumberRange):
         return int(interval)
 
 
+class TimestampArg(FunctionArg):
+    def __init__(self, name: str):
+        super().__init__(name)
+
+    def normalize(
+        self, value: str, params: ParamsType, combinator: Optional[Combinator]
+    ) -> Optional[float]:
+        if not params or not params.get("start") or not params.get("end"):
+            raise InvalidFunctionArgument("function called without date range")
+
+        try:
+            ts = datetime.fromtimestamp(int(value), tz=timezone.utc)
+        except (OverflowError, ValueError):
+            raise InvalidFunctionArgument(f"{value} is not a timestamp")
+
+        if ts < params["start"] or ts > params["end"]:
+            raise InvalidFunctionArgument("timestamp outside date range")
+
+        return ts
+
+
 class ColumnArg(FunctionArg):
     """Parent class to any function argument that should eventually resolve to a
     column

+ 0 - 64
tests/sentry/search/events/builder/test_profile_functions.py

@@ -6,7 +6,6 @@ from snuba_sdk.conditions import Condition, Op
 from snuba_sdk.function import Function
 
 from sentry.search.events.builder.profile_functions import ProfileFunctionsQueryBuilder
-from sentry.search.events.types import QueryBuilderConfig
 from sentry.snuba.dataset import Dataset
 from sentry.testutils.factories import Factories
 from sentry.testutils.pytest.fixtures import django_db_all
@@ -81,66 +80,3 @@ def test_where(params, search, condition):
         selected_columns=["count()"],
     )
     assert condition in builder.where
-
-
-@pytest.mark.parametrize(
-    "search,condition",
-    [
-        pytest.param(
-            "slope(avg):>0",
-            Condition(
-                Function(
-                    "tupleElement",
-                    [
-                        Function(
-                            "simpleLinearRegression",
-                            [
-                                Function("toUInt32", [Column("timestamp")]),
-                                Function("finalizeAggregation", [Column("avg")]),
-                            ],
-                        ),
-                        1,
-                    ],
-                    "sentry_slope_avg",
-                ),
-                Op(">"),
-                0,
-            ),
-            id="regression",
-        ),
-        pytest.param(
-            "slope(avg):<0",
-            Condition(
-                Function(
-                    "tupleElement",
-                    [
-                        Function(
-                            "simpleLinearRegression",
-                            [
-                                Function("toUInt32", [Column("timestamp")]),
-                                Function("finalizeAggregation", [Column("avg")]),
-                            ],
-                        ),
-                        1,
-                    ],
-                    "sentry_slope_avg",
-                ),
-                Op("<"),
-                0,
-            ),
-            id="improvement",
-        ),
-    ],
-)
-@django_db_all
-def test_having(params, search, condition):
-    builder = ProfileFunctionsQueryBuilder(
-        Dataset.Functions,
-        params,
-        query=search,
-        selected_columns=["count()"],
-        config=QueryBuilderConfig(
-            use_aggregate_conditions=True,
-        ),
-    )
-    assert condition in builder.having

+ 51 - 107
tests/snuba/api/endpoints/test_organization_events.py

@@ -19,7 +19,12 @@ from sentry.models.transaction_threshold import (
     TransactionMetric,
 )
 from sentry.search.events import constants
-from sentry.testutils.cases import APITestCase, PerformanceIssueTestCase, SnubaTestCase
+from sentry.testutils.cases import (
+    APITestCase,
+    PerformanceIssueTestCase,
+    ProfilesSnubaTestCase,
+    SnubaTestCase,
+)
 from sentry.testutils.helpers import parse_link_header
 from sentry.testutils.helpers.datetime import before_now, freeze_time, iso_format
 from sentry.testutils.silo import region_silo_test
@@ -5743,106 +5748,36 @@ class OrganizationEventsProfilesDatasetEndpointTest(OrganizationEventsEndpointTe
 
 
 @region_silo_test
-class OrganizationEventsProfileFunctionsDatasetEndpointTest(OrganizationEventsEndpointTestBase):
-    @mock.patch("sentry.search.events.builder.discover.raw_snql_query")
-    def test_functions_dataset_simple(self, mock_snql_query):
-        mock_snql_query.side_effect = [
-            {
-                "data": [
-                    {
-                        "function": "foo_fn",
-                        "transaction": "foo_tx",
-                        "is_application": 1,
-                        "project": "python",
-                        "release": "backend@1",
-                        "platform.name": "python",
-                        "package": "lib_foo",
-                        "environment": "development",
-                        "p50()": 34695708.0,
-                        "p75()": 45980969.0,
-                        "p95()": 92592143.6,
-                        "p99()": 103764495.12000002,
-                        "avg()": 51235123.0,
-                        "sum()": 951283182.0,
-                        "examples()": [
-                            "6919e4076b4a46bbaf1f3ef1f0666147",
-                            "b04bafc378ea421b83b8680fd1caea52",
-                            "ceac40e40a0c44adb7f7b1e07f02586f",
-                            "6919e4076b4a46bbaf1f3ef1f0666147",
-                            "68acd0a8bdea46a59ba23907152b1ce5",
-                            "8ad2946586694c1b83d817fab3bccbc1",
-                        ],
-                        "count()": 12,
-                    },
-                ],
-                "meta": [
-                    {
-                        "name": "function",
-                        "type": "String",
-                    },
-                    {
-                        "name": "transaction",
-                        "type": "String",
-                    },
-                    {
-                        "name": "is_application",
-                        "type": "UInt8",
-                    },
-                    {
-                        "name": "project",
-                        "type": "String",
-                    },
-                    {
-                        "name": "release",
-                        "type": "String",
-                    },
-                    {
-                        "name": "platform.name",
-                        "type": "String",
-                    },
-                    {
-                        "name": "package",
-                        "type": "String",
-                    },
-                    {
-                        "name": "environment",
-                        "type": "String",
-                    },
-                    {
-                        "name": "p50()",
-                        "type": "Float64",
-                    },
-                    {
-                        "name": "p75()",
-                        "type": "Float64",
-                    },
-                    {
-                        "name": "p95()",
-                        "type": "Float64",
-                    },
-                    {
-                        "name": "p99()",
-                        "type": "Float64",
-                    },
-                    {
-                        "name": "avg()",
-                        "type": "Float64",
-                    },
-                    {
-                        "name": "sum()",
-                        "type": "Float64",
-                    },
-                    {
-                        "name": "examples()",
-                        "type": "Array(String)",
-                    },
-                    {
-                        "name": "count()",
-                        "type": "UInt64",
-                    },
-                ],
-            }
-        ]
+class OrganizationEventsProfileFunctionsDatasetEndpointTest(
+    OrganizationEventsEndpointTestBase, ProfilesSnubaTestCase
+):
+    def test_functions_dataset_simple(self):
+        self.store_functions(
+            [
+                {
+                    "self_times_ns": [100 for _ in range(100)],
+                    "package": "foo",
+                    "function": "bar",
+                    "in_app": True,
+                },
+            ],
+            project=self.project,
+            timestamp=before_now(hours=3),
+        )
+        self.store_functions(
+            [
+                {
+                    "self_times_ns": [150 for _ in range(100)],
+                    "package": "foo",
+                    "function": "bar",
+                    "in_app": True,
+                },
+            ],
+            project=self.project,
+            timestamp=before_now(hours=1),
+        )
+
+        mid = before_now(hours=2)
 
         fields = [
             "transaction",
@@ -5861,14 +5796,20 @@ class OrganizationEventsProfileFunctionsDatasetEndpointTest(OrganizationEventsEn
             "p99()",
             "avg()",
             "sum()",
+            f"percentile_before(function.duration, 0.95, {int(mid.timestamp())})",
+            f"percentile_after(function.duration, 0.95, {int(mid.timestamp())})",
+            f"percentile_delta(function.duration, 0.95, {int(mid.timestamp())})",
         ]
 
-        query = {
-            "field": fields,
-            "project": [self.project.id],
-            "dataset": "profileFunctions",
-        }
-        response = self.do_request(query, features={"organizations:profiling": True})
+        response = self.do_request(
+            {
+                "field": fields,
+                "statsPeriod": "4h",
+                "project": [self.project.id],
+                "dataset": "profileFunctions",
+            },
+            features={"organizations:profiling": True},
+        )
         assert response.status_code == 200, response.content
 
         # making sure the response keys are in the form we expect and not aliased
@@ -5895,6 +5836,9 @@ class OrganizationEventsProfileFunctionsDatasetEndpointTest(OrganizationEventsEn
             "p99()": "nanosecond",
             "avg()": "nanosecond",
             "sum()": "nanosecond",
+            f"percentile_before(function.duration, 0.95, {int(mid.timestamp())})": "nanosecond",
+            f"percentile_after(function.duration, 0.95, {int(mid.timestamp())})": "nanosecond",
+            f"percentile_delta(function.duration, 0.95, {int(mid.timestamp())})": "nanosecond",
         }