Просмотр исходного кода

ref(metrics): Automatically infer the granularity and use case ID (#58542)

The granularity and use case ID can be determined based on other
parameters
passed in from the query. Automatically determine them based on that.

Note that the granularity inference is not smart. It simply picks the
largest
possible granularity based on the specified time range and interval. If
the
time range is not rounded to the closest X day/hour/minute then it will
default
to the smallest granularity.
Evan Hicks 1 год назад
Родитель
Сommit
2a9472143a

+ 1 - 1
requirements-base.txt

@@ -63,7 +63,7 @@ sentry-kafka-schemas>=0.1.32
 sentry-redis-tools>=0.1.7
 sentry-relay>=0.8.32
 sentry-sdk>=1.31.0
-snuba-sdk>=2.0.5
+snuba-sdk>=2.0.6
 simplejson>=3.17.6
 sqlparse>=0.4.4
 statsd>=3.3

+ 1 - 1
requirements-dev-frozen.txt

@@ -178,7 +178,7 @@ sentry-usage-accountant==0.0.10
 simplejson==3.17.6
 six==1.16.0
 sniffio==1.2.0
-snuba-sdk==2.0.5
+snuba-sdk==2.0.6
 sortedcontainers==2.4.0
 soupsieve==2.3.2.post1
 sqlparse==0.4.4

+ 1 - 1
requirements-frozen.txt

@@ -118,7 +118,7 @@ sentry-sdk==1.31.0
 sentry-usage-accountant==0.0.10
 simplejson==3.17.6
 six==1.16.0
-snuba-sdk==2.0.5
+snuba-sdk==2.0.6
 soupsieve==2.3.2.post1
 sqlparse==0.4.4
 statsd==3.3

+ 68 - 5
src/sentry/snuba/metrics_layer/query.py

@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import replace
+from datetime import datetime
 from typing import Any, Mapping, Union
 
 from snuba_sdk import (
@@ -26,6 +27,10 @@ from sentry.utils.snuba import raw_snql_query
 FilterTypes = Union[Column, CurriedFunction, Condition, BooleanCondition]
 
 
+ALLOWED_GRANULARITIES = [10, 60, 3600, 86400]
+ALLOWED_GRANULARITIES = sorted(ALLOWED_GRANULARITIES)  # Ensure it's ordered
+
+
 def run_query(request: Request) -> Mapping[str, Any]:
     """
     Entrypoint for executing a metrics query in Snuba.
@@ -44,8 +49,6 @@ def run_query(request: Request) -> Mapping[str, Any]:
     assert len(metrics_query.scope.org_ids) == 1  # Initially only allow 1 org id
     organization_id = metrics_query.scope.org_ids[0]
     tenant_ids = request.tenant_ids or {"organization_id": organization_id}
-    if "use_case_id" not in tenant_ids and metrics_query.scope.use_case_id is not None:
-        tenant_ids["use_case_id"] = metrics_query.scope.use_case_id
     request.tenant_ids = tenant_ids
 
     # Process intervals
@@ -57,11 +60,19 @@ def run_query(request: Request) -> Mapping[str, Any]:
             metrics_query.start, metrics_query.end, metrics_query.rollup.interval
         )
         metrics_query = metrics_query.set_start(start).set_end(end)
+    if metrics_query.rollup.granularity is None:
+        granularity = _resolve_granularity(
+            metrics_query.start, metrics_query.end, metrics_query.rollup.interval
+        )
+        metrics_query = metrics_query.set_rollup(
+            replace(metrics_query.rollup, granularity=granularity)
+        )
 
     # Resolves MRI or public name in metrics_query
     try:
         resolved_metrics_query, mappings = _resolve_metrics_query(metrics_query)
         request.query = resolved_metrics_query
+        request.tenant_ids["use_case_id"] = resolved_metrics_query.scope.use_case_id
     except Exception as e:
         metrics.incr(
             "metrics_layer.query",
@@ -108,6 +119,18 @@ GENERIC_ENTITIES = {
 }
 
 
+def _resolve_use_case_id_str(metrics_query: MetricsQuery) -> str:
+    # Automatically resolve the use_case_id if it is not provided
+    # TODO: At the moment only a single Timeseries is allowed. In the future this will need to find
+    # all the Timeseries and ensure they all have the same use case.
+    mri = metrics_query.query.metric.mri
+    parsed_mri = parse_mri(mri)
+    if parsed_mri is None:
+        raise InvalidParams(f"'{mri}' is not a valid MRI")
+
+    return parsed_mri.namespace
+
+
 def _resolve_metrics_entity(mri: str) -> EntityKey:
     parsed_mri = parse_mri(mri)
     if parsed_mri is None:
@@ -119,6 +142,41 @@ def _resolve_metrics_entity(mri: str) -> EntityKey:
     return GENERIC_ENTITIES[parsed_mri.entity]
 
 
+def _resolve_granularity(start: datetime, end: datetime, interval: int | None) -> int:
+    """
+    Returns the granularity in seconds based on the start, end, and interval.
+    If the interval is set, then find the largest granularity that is smaller or equal to the interval.
+
+    If the interval is None, then it must be a totals query, which means this will use the biggest granularity
+    that matches the offset from the time range. This function does no automatic fitting of the time range to
+    a performant granularity.
+
+    E.g. if the time range is 7 days, but going from 3:01:53pm to 3:01:53pm, then it has to use the 10s
+    granularity, and the performance will suffer.
+    """
+    if interval is not None:
+        for granularity in ALLOWED_GRANULARITIES[::-1]:
+            if granularity <= interval:
+                return granularity
+
+        return ALLOWED_GRANULARITIES[0]  # Default to smallest granularity
+
+    found_granularities = []
+    for t in [start, end]:
+        rounded_to_day = t.replace(hour=0, minute=0, second=0, microsecond=0)
+        second_diff = int((t - rounded_to_day).total_seconds())
+
+        found = None
+        for granularity in ALLOWED_GRANULARITIES[::-1]:
+            if second_diff % granularity == 0:
+                found = granularity
+                break
+
+        found_granularities.append(found if found is not None else ALLOWED_GRANULARITIES[0])
+
+    return min(found_granularities)
+
+
 def _resolve_metrics_query(
     metrics_query: MetricsQuery,
 ) -> tuple[MetricsQuery, Mapping[str, str | int]]:
@@ -128,7 +186,6 @@ def _resolve_metrics_query(
     """
     assert metrics_query.query is not None
     metric = metrics_query.query.metric
-    scope = metrics_query.scope
     mappings: dict[str, str | int] = {}
     if not metric.public_name and metric.mri:
         public_name = get_public_name_from_mri(metric.mri)
@@ -143,8 +200,14 @@ def _resolve_metrics_query(
         )
         mappings[metric.public_name] = mri
 
-    org_id = scope.org_ids[0]
-    use_case_id = string_to_use_case_id(scope.use_case_id)
+    org_id = metrics_query.scope.org_ids[0]
+    use_case_id_str = _resolve_use_case_id_str(metrics_query)
+    if metrics_query.scope.use_case_id is None:
+        metrics_query = metrics_query.set_scope(
+            metrics_query.scope.set_use_case_id(use_case_id_str)
+        )
+
+    use_case_id = string_to_use_case_id(use_case_id_str)
     metric_id = resolve_weak(
         use_case_id, org_id, metrics_query.query.metric.mri
     )  # only support raw metrics for now

+ 30 - 1
tests/sentry/snuba/metrics/test_metrics_query_layer/test_metrics_query_layer.py

@@ -2,6 +2,8 @@
 Metrics Service Layer Tests for Performance
 """
 
+from datetime import datetime, timedelta
+
 import pytest
 from snuba_sdk import (
     AliasedExpression,
@@ -18,7 +20,7 @@ from snuba_sdk import (
 from sentry.sentry_metrics import indexer
 from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 from sentry.snuba.metrics.naming_layer import TransactionMRI
-from sentry.snuba.metrics_layer.query import _resolve_metrics_query
+from sentry.snuba.metrics_layer.query import _resolve_granularity, _resolve_metrics_query
 from sentry.testutils.cases import BaseMetricsLayerTestCase, TestCase
 from sentry.testutils.helpers.datetime import freeze_time
 
@@ -154,3 +156,30 @@ class MetricsQueryLayerTest(BaseMetricsLayerTestCase, TestCase):
         assert mappings[TransactionMRI.DURATION.value] == expected_metric_id
         assert mappings["transaction"] == expected_transaction_id
         assert mappings["device"] == expected_device_id
+
+
+@pytest.mark.parametrize(
+    "day_range, sec_offset, interval, expected",
+    [
+        # Interval tests
+        (7, 0, timedelta(hours=1).total_seconds(), 3600),
+        (7, 0, timedelta(seconds=10).total_seconds(), 10),
+        (7, 0, timedelta(seconds=5).total_seconds(), 10),
+        (7, 0, timedelta(hours=2).total_seconds(), 3600),
+        (7, 0, timedelta(days=2).total_seconds(), 86400),
+        # Totals tests
+        (7, 0, None, 86400),
+        (7, timedelta(hours=1).total_seconds(), None, 3600),
+        (7, timedelta(hours=2).total_seconds(), None, 3600),
+        (7, timedelta(hours=2, minutes=1).total_seconds(), None, 60),
+        (7, timedelta(hours=2, minutes=2).total_seconds(), None, 60),
+        (7, timedelta(hours=2, minutes=2, seconds=30).total_seconds(), None, 10),
+        (7, timedelta(hours=2, minutes=2, seconds=10).total_seconds(), None, 10),
+        (7, timedelta(hours=2, minutes=2, seconds=5).total_seconds(), None, 10),
+    ],
+)
+def test_resolve_granularity(day_range: int, sec_offset: int, interval: int, expected: int) -> None:
+    now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+    start = now - timedelta(days=day_range) - timedelta(seconds=sec_offset)
+    end = now - timedelta(seconds=sec_offset)
+    assert _resolve_granularity(start, end, interval) == expected

+ 29 - 1
tests/snuba/test_metrics_layer.py

@@ -39,7 +39,6 @@ class SnQLTest(TestCase, BaseMetricsTestCase):
         self.now = datetime.now(tz=timezone.utc).replace(microsecond=0)
         self.hour_ago = self.now - timedelta(hours=1)
         self.org_id = self.project.organization_id
-        # Store a data point every 10 seconds for an hour
         for mri, metric_type in self.metrics.items():
             assert metric_type in {"counter", "distribution", "set"}
             for i in range(360):
@@ -346,3 +345,32 @@ class SnQLTest(TestCase, BaseMetricsTestCase):
         result = run_query(request)
         assert len(result["data"]) == 54
         assert result["totals"]["aggregate_value"] == 59
+
+    def test_automatic_granularity(self) -> None:
+        query = MetricsQuery(
+            query=Timeseries(
+                metric=Metric(
+                    "transaction.duration",
+                    TransactionMRI.DURATION.value,
+                ),
+                aggregate="max",
+            ),
+            start=self.hour_ago,
+            end=self.now,
+            rollup=Rollup(interval=120),
+            scope=MetricsScope(
+                org_ids=[self.org_id],
+                project_ids=[self.project.id],
+            ),
+        )
+
+        request = Request(
+            dataset="generic_metrics",
+            app_id="tests",
+            query=query,
+            tenant_ids={"referrer": "metrics.testing.test", "organization_id": self.org_id},
+        )
+        result = run_query(request)
+        # 30 since it's every 2 minutes.
+        # # TODO(evanh): There's a flaky off by one error that comes from the interval rounding I don't care to fix right now, hence the 31 option.
+        assert len(result["data"]) in [30, 31]