Просмотр исходного кода

fix(metrics): Zero-fill response when there's no data [INGEST-941] (#32157)

When there isn't any metrics data, the `groups` of the response is empty.
However, the absence of data must be represented with an appropriate value.
For example, `count_unique(user)` must return `0` when there aren't any
users, instead of returning no data. The value representing the absence of
data is `0` for sums and counts, and `None` for everything else
(such as `p50`).
Iker Barriocanal 3 лет назад
Родитель
Сommit
cfdb7fdc1f

+ 43 - 33
src/sentry/release_health/metrics_sessions_v2.py

@@ -23,6 +23,7 @@ from typing import (
     TypedDict,
     Union,
     cast,
+    get_args,
 )
 
 from snuba_sdk import (
@@ -709,39 +710,48 @@ def run_sessions_query(
         }
     )
 
-    for key in data_points.keys():
-        try:
-            output_field = metric_to_output_field[key.metric_key, key.column]
-        except KeyError:
-            continue  # secondary metric, like session.error
-
-        by: MutableMapping[GroupByFieldName, Union[str, int]] = {}
-        if key.release is not None:
-            # Every session has a release, so this should not throw
-            by["release"] = reverse_resolve(key.release)
-        if key.environment is not None:
-            # To match behavior of the old sessions backend, session data
-            # without environment is grouped under the empty string.
-            by["environment"] = reverse_resolve_weak(key.environment) or ""
-        if key.project_id is not None:
-            by["project"] = key.project_id
-
-        for status_value in output_field.get_values(data_points, key):
-            if status_value.session_status is not None:
-                by["session.status"] = status_value.session_status
-
-            group_key: GroupKey = tuple(sorted(by.items()))
-            group = groups[group_key]
-
-            value = status_value.value
-            if value is not None:
-                value = finite_or_none(value)
-
-            if key.bucketed_time is None:
-                group["totals"][output_field.get_name()] = value
-            else:
-                index = timestamp_index[key.bucketed_time]
-                group["series"][output_field.get_name()][index] = value
+    if len(data_points) == 0:
+        # We're only interested in `session.status` group-byes. The rest of the
+        # conditions require work (e.g. getting all environments) that we can't
+        # get without querying the DB.
+        if "session.status" in query_clone.raw_groupby:
+            for status in get_args(_SessionStatus):
+                gkey: GroupKey = (("session.status", status),)
+                groups[gkey]
+    else:
+        for key in data_points.keys():
+            try:
+                output_field = metric_to_output_field[key.metric_key, key.column]
+            except KeyError:
+                continue  # secondary metric, like session.error
+
+            by: MutableMapping[GroupByFieldName, Union[str, int]] = {}
+            if key.release is not None:
+                # Every session has a release, so this should not throw
+                by["release"] = reverse_resolve(key.release)
+            if key.environment is not None:
+                # To match behavior of the old sessions backend, session data
+                # without environment is grouped under the empty string.
+                by["environment"] = reverse_resolve_weak(key.environment) or ""
+            if key.project_id is not None:
+                by["project"] = key.project_id
+
+            for status_value in output_field.get_values(data_points, key):
+                if status_value.session_status is not None:
+                    by["session.status"] = status_value.session_status  # !
+
+                group_key: GroupKey = tuple(sorted(by.items()))
+                group: Group = groups[group_key]
+
+                value = status_value.value
+                if value is not None:
+                    value = finite_or_none(value)
+
+                if key.bucketed_time is None:
+                    group["totals"][output_field.get_name()] = value
+                else:
+                    index = timestamp_index[key.bucketed_time]
+                    group["series"][output_field.get_name()][index] = value
 
     groups_as_list: List[SessionsQueryGroup] = [
         {

+ 86 - 0
tests/sentry/release_health/test_metrics_sessions_v2.py

@@ -0,0 +1,86 @@
+from unittest.mock import patch
+
+from django.urls import reverse
+
+from sentry.release_health.duplex import compare_results
+from sentry.release_health.metrics import MetricsReleaseHealthBackend
+from sentry.release_health.sessions import SessionsReleaseHealthBackend
+from sentry.sentry_metrics.indexer.mock import MockIndexer
+from sentry.testutils.cases import APITestCase, SnubaTestCase
+from tests.snuba.api.endpoints.test_organization_sessions import result_sorted
+
+
+class MetricsSessionsV2Test(APITestCase, SnubaTestCase):
+    def setUp(self):
+        super().setUp()
+        self.setup_fixture()
+
+    def setup_fixture(self):
+        self.organization1 = self.organization
+        self.project1 = self.project
+        self.project2 = self.create_project(
+            name="teletubbies", slug="teletubbies", teams=[self.team], fire_project_created=True
+        )
+
+        self.release1 = self.create_release(project=self.project1, version="hello")
+        self.release2 = self.create_release(project=self.project1, version="hola")
+        self.release3 = self.create_release(project=self.project2, version="hallo")
+
+        self.environment1 = self.create_environment(self.project1, name="development")
+        self.environment2 = self.create_environment(self.project1, name="production")
+        self.environment3 = self.create_environment(self.project2, name="testing")
+
+    def do_request(self, query, user=None, org=None):
+        self.login_as(user=user or self.user)
+        url = reverse(
+            "sentry-api-0-organization-sessions",
+            kwargs={"organization_slug": (org or self.organization1).slug},
+        )
+        return self.client.get(url, query, format="json")
+
+    def get_sessions_data(self, groupby, interval):
+        response = self.do_request(
+            {
+                "organization_slug": [self.organization1],
+                "project": [self.project1.id],
+                "field": ["sum(session)"],
+                "groupBy": [groupby],
+                "interval": interval,
+            }
+        )
+        assert response.status_code == 200
+        return response.data
+
+    def test_sessions_metrics_equal_num_keys(self):
+        """
+        Tests whether the number of keys in the metrics implementation of
+        sessions data is the same as in the sessions implementation.
+
+        Runs twice. Firstly, against sessions implementation to populate the
+        cache. Then, against the metrics implementation, and compares with
+        cached results.
+        """
+        empty_groupbyes = ["project", "release", "environment", "session.status"]
+        interval_days = "1d"
+
+        for groupby in empty_groupbyes:
+            with patch(
+                "sentry.api.endpoints.organization_sessions.release_health",
+                SessionsReleaseHealthBackend(),
+            ):
+                sessions_data = result_sorted(self.get_sessions_data(groupby, interval_days))
+
+            with patch(
+                "sentry.release_health.metrics_sessions_v2.indexer.resolve", MockIndexer().resolve
+            ), patch(
+                "sentry.api.endpoints.organization_sessions.release_health",
+                MetricsReleaseHealthBackend(),
+            ):
+                metrics_data = result_sorted(self.get_sessions_data(groupby, interval_days))
+
+            errors = compare_results(
+                sessions=sessions_data,
+                metrics=metrics_data,
+                rollup=interval_days * 24 * 60 * 60,  # days to seconds
+            )
+            assert len(errors) == 0