Browse Source

feat(functions): Add cpm function for functions dataset (#59071)

Attempt 2 of #59033. Because we had to introduce the sentry alias prefix
to the timeseries query to avoid alias conflicts in snuba, we have to
handle it in the response as well. This change properly handles that and
adds a test for it.
Tony Xiao 1 year ago
parent
commit
53c0531140

+ 14 - 0
src/sentry/search/events/builder/profile_functions.py

@@ -7,6 +7,7 @@ from sentry.api.event_search import SearchFilter, SearchKey, SearchValue
 from sentry.discover.arithmetic import categorize_columns
 from sentry.search.events.builder import QueryBuilder, TimeseriesQueryBuilder
 from sentry.search.events.datasets.profile_functions import ProfileFunctionsDatasetConfig
+from sentry.search.events.fields import get_function_alias
 from sentry.search.events.types import (
     ParamsType,
     QueryBuilderConfig,
@@ -49,6 +50,19 @@ class ProfileFunctionsQueryBuilder(ProfileFunctionsQueryBuilderMixin, QueryBuild
 class ProfileFunctionsTimeseriesQueryBuilder(
     ProfileFunctionsQueryBuilderMixin, TimeseriesQueryBuilder
 ):
+    function_alias_prefix = "sentry_"
+
+    def strip_alias_prefix(self, result):
+        alias_mappings = {
+            column: get_function_alias(function_details.field)
+            for column, function_details in self.function_alias_map.items()
+        }
+        result["data"] = [
+            {alias_mappings.get(k, k): v for k, v in item.items()}
+            for item in result.get("data", [])
+        ]
+        return result
+
     @property
     def time_column(self) -> SelectType:
         return Function(

+ 21 - 0
src/sentry/search/events/datasets/profile_functions.py

@@ -240,6 +240,11 @@ class ProfileFunctionsDatasetConfig(DatasetConfig):
                     ),
                     default_result_type="integer",
                 ),
+                SnQLFunction(
+                    "cpm",  # calls per minute
+                    snql_aggregate=lambda args, alias: self._resolve_cpm(args, alias),
+                    default_result_type="integer",
+                ),
                 SnQLFunction(
                     "cpm_before",
                     required_args=[TimestampArg("timestamp")],
@@ -522,6 +527,22 @@ class ProfileFunctionsDatasetConfig(DatasetConfig):
             alias,
         )
 
+    def _resolve_cpm(
+        self,
+        args: Mapping[str, Union[str, Column, SelectType, int, float]],
+        alias: str | None,
+    ) -> SelectType:
+        interval = (self.builder.params.end - self.builder.params.start).total_seconds()
+
+        return Function(
+            "divide",
+            [
+                Function("countMerge", [SnQLColumn("count")]),
+                Function("divide", [interval, 60]),
+            ],
+            alias,
+        )
+
     def _resolve_cpm_cond(
         self,
         args: Mapping[str, Union[str, Column, SelectType, int, float]],

+ 3 - 0
src/sentry/snuba/functions.py

@@ -94,6 +94,7 @@ def timeseries_query(
         ),
     )
     results = builder.run_query(referrer)
+    results = builder.strip_alias_prefix(results)
 
     return SnubaTSResult(
         {
@@ -214,6 +215,8 @@ def format_top_events_timeseries_results(
     with sentry_sdk.start_span(
         op="discover.discover", description="top_events.transform_results"
     ) as span:
+        result = query_builder.strip_alias_prefix(result)
+
         span.set_data("result_count", len(result.get("data", [])))
         processed_result = query_builder.process_results(result)
 

+ 113 - 1
tests/snuba/api/endpoints/test_organization_events_stats.py

@@ -14,7 +14,7 @@ from sentry.constants import MAX_TOP_EVENTS
 from sentry.issues.grouptype import ProfileFileIOGroupType
 from sentry.models.transaction_threshold import ProjectTransactionThreshold, TransactionMetric
 from sentry.snuba.discover import OTHER_KEY
-from sentry.testutils.cases import APITestCase, SnubaTestCase
+from sentry.testutils.cases import APITestCase, ProfilesSnubaTestCase, SnubaTestCase
 from sentry.testutils.helpers.datetime import before_now, iso_format
 from sentry.testutils.silo import region_silo_test
 from sentry.utils.samples import load_data
@@ -2569,3 +2569,115 @@ class OrganizationEventsStatsTopNEvents(APITestCase, SnubaTestCase):
             )
 
         assert response.status_code == 200
+
+
+@region_silo_test
+class OrganizationEventsStatsProfileFunctionDatasetEndpointTest(
+    APITestCase, ProfilesSnubaTestCase, SearchIssueTestMixin
+):
+    endpoint = "sentry-api-0-organization-events-stats"
+
+    def setUp(self):
+        super().setUp()
+        self.login_as(user=self.user)
+
+        self.one_day_ago = before_now(days=1).replace(hour=10, minute=0, second=0, microsecond=0)
+        self.two_days_ago = before_now(days=2).replace(hour=10, minute=0, second=0, microsecond=0)
+        self.three_days_ago = before_now(days=3).replace(hour=10, minute=0, second=0, microsecond=0)
+
+        self.project = self.create_project()
+
+        self.url = reverse(
+            "sentry-api-0-organization-events-stats",
+            kwargs={"organization_slug": self.project.organization.slug},
+        )
+
+    def test_functions_dataset_simple(self):
+        self.store_functions(
+            [
+                {
+                    "self_times_ns": [100 for _ in range(100)],
+                    "package": "foo",
+                    "function": "bar",
+                    "in_app": True,
+                },
+            ],
+            project=self.project,
+            timestamp=self.two_days_ago,
+        )
+
+        data = {
+            "dataset": "profileFunctions",
+            "start": iso_format(self.three_days_ago),
+            "end": iso_format(self.one_day_ago),
+            "interval": "1d",
+            "yAxis": "cpm()",
+        }
+
+        response = self.client.get(self.url, data=data, format="json")
+        assert response.status_code == 200, response.content
+        assert sum(row[1][0]["count"] for row in response.data["data"]) == pytest.approx(
+            100 / ((self.one_day_ago - self.three_days_ago).total_seconds() / 60), rel=1e-3
+        )
+
+
+@region_silo_test
+class OrganizationEventsStatsTopNEventsProfileFunctionDatasetEndpointTest(
+    APITestCase, ProfilesSnubaTestCase, SearchIssueTestMixin
+):
+    endpoint = "sentry-api-0-organization-events-stats"
+
+    def setUp(self):
+        super().setUp()
+        self.login_as(user=self.user)
+
+        self.one_day_ago = before_now(days=1).replace(hour=10, minute=0, second=0, microsecond=0)
+        self.two_days_ago = before_now(days=2).replace(hour=10, minute=0, second=0, microsecond=0)
+        self.three_days_ago = before_now(days=3).replace(hour=10, minute=0, second=0, microsecond=0)
+
+        self.project = self.create_project()
+
+        self.url = reverse(
+            "sentry-api-0-organization-events-stats",
+            kwargs={"organization_slug": self.project.organization.slug},
+        )
+
+    def test_functions_dataset_simple(self):
+        self.store_functions(
+            [
+                {
+                    "self_times_ns": [100 for _ in range(100)],
+                    "package": "pkg",
+                    "function": "foo",
+                    "in_app": True,
+                },
+                {
+                    "self_times_ns": [100 for _ in range(10)],
+                    "package": "pkg",
+                    "function": "bar",
+                    "in_app": True,
+                },
+            ],
+            project=self.project,
+            timestamp=self.two_days_ago,
+        )
+
+        data = {
+            "dataset": "profileFunctions",
+            "field": ["function", "count()"],
+            "start": iso_format(self.three_days_ago),
+            "end": iso_format(self.one_day_ago),
+            "yAxis": "cpm()",
+            "interval": "1d",
+            "topEvents": 2,
+            "excludeOther": 1,
+        }
+
+        response = self.client.get(self.url, data=data, format="json")
+        assert response.status_code == 200, response.content
+        assert sum(row[1][0]["count"] for row in response.data["foo"]["data"]) == pytest.approx(
+            100 / ((self.one_day_ago - self.three_days_ago).total_seconds() / 60), rel=1e-3
+        )
+        assert sum(row[1][0]["count"] for row in response.data["bar"]["data"]) == pytest.approx(
+            10 / ((self.one_day_ago - self.three_days_ago).total_seconds() / 60), rel=1e-3
+        )