Browse Source

fix(sessions): Improve rounding of realtime queries (#24558)

There is a cache between snuba and sentry with a TTL of 5 minutes.
For "realtime" (data to NOW) queries, we round to the minute rather than to the end of the hour
to bust that cache more frequently and actually have meaningful per-minute data.

Additionally, this returns the actually used start/end dates of the query, improves the documentation
and cleans up a few things.
Arpad Borsos 4 years ago
parent
commit
96ebce9f5f

+ 14 - 3
api-docs/components/schemas/sessions.json

@@ -1,14 +1,25 @@
 {
   "Sessions": {
     "type": "object",
-    "required": ["intervals", "groups"],
+    "required": ["start", "end", "intervals", "groups"],
     "properties": {
+      "start": {
+        "type": "string",
+        "format": "date-time",
+        "description": "The start time of the data being returned."
+      },
+      "end": {
+        "type": "string",
+        "format": "date-time",
+        "description": "The exclusive end time of the data being returned."
+      },
       "intervals": {
         "type": "array",
         "items": {
           "type": "string",
           "format": "date-time"
-        }
+        },
+        "description": "The time slices of the timeseries data given in the `groups[].series` field."
       },
       "groups": {
         "type": "array",
@@ -33,7 +44,7 @@
       },
       "series": {
         "type": "object",
-        "description": "These are key/value pairs, the key boeing the requested `field`, and the value is an array of aggregated values. The array corresponds to the times given in the `intervals` array.",
+        "description": "These are key/value pairs, the key being the requested `field`, and the value is an array of aggregated values. The array corresponds to the times given in the `intervals` array.",
         "additionalProperties": {"type": "array"}
       }
     }

+ 3 - 1
api-docs/paths/releases/sessions.json

@@ -116,7 +116,7 @@
       {
         "name": "end",
         "in": "query",
-        "description": "This defines the end of the time series range as an explicit datetime.",
+        "description": "This defines the inclusive end of the time series range as an explicit datetime.",
         "required": false,
         "schema": {"type": "string", "format": "date-time"}
       }
@@ -130,6 +130,8 @@
               "$ref": "../../components/schemas/sessions.json#/Sessions"
             },
             "example": {
+              "start": "2021-02-01T00:00:00Z",
+              "end": "2021-02-04T00:00:00Z",
               "intervals": [
                 "2021-02-01T00:00:00Z",
                 "2021-02-02T00:00:00Z",

+ 17 - 2
src/sentry/snuba/sessions_v2.py

@@ -227,7 +227,6 @@ class QueryDefinition:
     `fields` and `groupby` definitions as [`ColumnDefinition`] objects.
     """
 
-    # XXX: Do *not* enable `allow_minute_resolution` yet outside of unit tests!
     def __init__(self, query, params, allow_minute_resolution=False):
         self.query = query.get("query", "")
         raw_fields = query.getlist("field", [])
@@ -309,6 +308,7 @@ def _get_constrained_date_range(params, allow_minute_resolution=False):
     using_minute_resolution = interval % ONE_HOUR != 0
 
     start, end = get_date_range_from_params(params)
+    now = datetime.now(tz=pytz.utc)
 
     # if `end` is explicitly given, we add a second to it, so it is treated as
     # inclusive. the rounding logic down below will take care of the rest.
@@ -319,6 +319,9 @@ def _get_constrained_date_range(params, allow_minute_resolution=False):
     # round the range up to a multiple of the interval.
     # the minimum is 1h so the "totals" will not go out of sync, as they will
     # use the materialized storage due to no grouping on the `started` column.
+    # NOTE: we can remove the difference between `interval` / `rounding_interval`
+    # as soon as snuba can provide us with grouped totals in the same query
+    # as the timeseries (using `WITH ROLLUP` in clickhouse)
     rounding_interval = int(math.ceil(interval / ONE_HOUR) * ONE_HOUR)
     date_range = timedelta(
         seconds=int(rounding_interval * math.ceil(date_range.total_seconds() / rounding_interval))
@@ -329,7 +332,7 @@ def _get_constrained_date_range(params, allow_minute_resolution=False):
             raise InvalidParams(
                 "The time-range when using one-minute resolution intervals is restricted to 6 hours."
             )
-        if (datetime.now(tz=pytz.utc) - start).total_seconds() > 30 * ONE_DAY:
+        if (now - start).total_seconds() > 30 * ONE_DAY:
             raise InvalidParams(
                 "The time-range when using one-minute resolution intervals is restricted to the last 30 days."
             )
@@ -351,6 +354,12 @@ def _get_constrained_date_range(params, allow_minute_resolution=False):
         date_range += timedelta(seconds=rounding_interval)
     start = end - date_range
 
+    # snuba <-> sentry has a 5 minute cache for *exact* queries, which these
+    # are because of the way we do our rounding. For that reason we round the end
+    # of "realtime" queries to one minute into the future to get a one-minute cache instead.
+    if end > now:
+        end = to_datetime(ONE_MINUTE * (math.floor(to_timestamp(now) / ONE_MINUTE) + 1))
+
     return start, end, interval
 
 
@@ -474,12 +483,18 @@ def massage_sessions_result(query, result_totals, result_timeseries):
         groups.append(group)
 
     return {
+        "start": _isoformat_z(query.start),
+        "end": _isoformat_z(query.end),
         "query": query.query,
         "intervals": timestamps,
         "groups": groups,
     }
 
 
+def _isoformat_z(date):
+    return datetime.utcfromtimestamp(int(to_timestamp(date))).isoformat() + "Z"
+
+
 def _get_timestamps(query):
     """
     Generates a list of timestamps according to `query`.

+ 11 - 5
tests/snuba/api/endpoints/test_organization_sessions.py

@@ -1,6 +1,5 @@
 import datetime
 import pytz
-import pytest
 
 from uuid import uuid4
 from freezegun import freeze_time
@@ -163,6 +162,8 @@ class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
 
         assert response.status_code == 200, response.content
         assert result_sorted(response.data) == {
+            "start": "2021-01-14T00:00:00Z",
+            "end": "2021-01-14T12:28:00Z",
             "query": "",
             "intervals": ["2021-01-14T00:00:00Z"],
             "groups": [{"by": {}, "series": {"sum(session)": [9]}, "totals": {"sum(session)": 9}}],
@@ -174,6 +175,8 @@ class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
 
         assert response.status_code == 200, response.content
         assert result_sorted(response.data) == {
+            "start": "2021-01-13T18:00:00Z",
+            "end": "2021-01-14T12:28:00Z",
             "query": "",
             "intervals": [
                 "2021-01-13T18:00:00Z",
@@ -195,6 +198,8 @@ class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
 
         assert response.status_code == 200, response.content
         assert result_sorted(response.data) == {
+            "start": "2021-01-14T00:00:00Z",
+            "end": "2021-01-14T12:28:00Z",
             "query": "",
             "intervals": ["2021-01-14T00:00:00Z"],
             "groups": [{"by": {}, "series": {"sum(session)": [9]}, "totals": {"sum(session)": 9}}],
@@ -225,6 +230,8 @@ class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
         )
         assert response.status_code == 200, response.content
         assert result_sorted(response.data) == {
+            "start": "2021-01-14T11:00:00Z",
+            "end": "2021-01-14T12:28:00Z",
             "query": "",
             "intervals": ["2021-01-14T11:00:00Z", "2021-01-14T12:00:00Z"],
             "groups": [
@@ -232,7 +239,6 @@ class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
             ],
         }
 
-    @pytest.mark.skip(reason="requires unflagging minute-resolution sessions in snuba")
     @freeze_time("2021-01-14T12:37:28.303Z")
     def test_minute_resolution(self):
         with self.feature("organizations:minute-resolution-sessions"):
@@ -246,19 +252,19 @@ class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
             )
             assert response.status_code == 200, response.content
             assert result_sorted(response.data) == {
+                "start": "2021-01-14T12:00:00Z",
+                "end": "2021-01-14T12:38:00Z",
                 "query": "",
                 "intervals": [
                     "2021-01-14T12:00:00Z",
                     "2021-01-14T12:10:00Z",
                     "2021-01-14T12:20:00Z",
                     "2021-01-14T12:30:00Z",
-                    "2021-01-14T12:40:00Z",
-                    "2021-01-14T12:50:00Z",
                 ],
                 "groups": [
                     {
                         "by": {},
-                        "series": {"sum(session)": [2, 1, 1, 0, 0, 0]},
+                        "series": {"sum(session)": [2, 1, 1, 0]},
                         "totals": {"sum(session)": 4},
                     }
                 ],

+ 23 - 5
tests/snuba/sessions/test_sessions_v2.py

@@ -30,15 +30,15 @@ def result_sorted(result):
     return result
 
 
-@freeze_time("2018-12-11 03:21:34")
+@freeze_time("2018-12-11 03:21:00")
 def test_round_range():
     start, end, interval = _get_constrained_date_range({"statsPeriod": "2d"})
     assert start == datetime(2018, 12, 9, 4, tzinfo=pytz.utc)
-    assert end == datetime(2018, 12, 11, 4, tzinfo=pytz.utc)
+    assert end == datetime(2018, 12, 11, 3, 22, tzinfo=pytz.utc)
 
     start, end, interval = _get_constrained_date_range({"statsPeriod": "2d", "interval": "1d"})
     assert start == datetime(2018, 12, 10, tzinfo=pytz.utc)
-    assert end == datetime(2018, 12, 12, tzinfo=pytz.utc)
+    assert end == datetime(2018, 12, 11, 3, 22, tzinfo=pytz.utc)
 
 
 def test_invalid_interval():
@@ -126,7 +126,8 @@ def test_hourly_rounded_start():
     actual_timestamps = _get_timestamps(query)
 
     assert actual_timestamps[0] == "2021-03-08T09:00:00Z"
-    assert len(actual_timestamps) == 60
+    assert actual_timestamps[-1] == "2021-03-08T09:34:00Z"
+    assert len(actual_timestamps) == 35
 
     # in this case "45m" means from 08:49:00-09:34:00, but since we round start/end
     # to hours, we extend the start time to 08:00:00.
@@ -135,7 +136,8 @@ def test_hourly_rounded_start():
     actual_timestamps = _get_timestamps(query)
 
     assert actual_timestamps[0] == "2021-03-08T08:00:00Z"
-    assert len(actual_timestamps) == 120
+    assert actual_timestamps[-1] == "2021-03-08T09:34:00Z"
+    assert len(actual_timestamps) == 95
 
 
 def test_rounded_end():
@@ -221,6 +223,8 @@ def test_massage_empty():
     result_timeseries = []
 
     expected_result = {
+        "start": "2020-12-18T00:00:00Z",
+        "end": "2020-12-18T11:15:00Z",
         "query": "",
         "intervals": ["2020-12-18T00:00:00Z"],
         "groups": [],
@@ -241,6 +245,8 @@ def test_massage_unbalanced_results():
     result_timeseries = []
 
     expected_result = {
+        "start": "2020-12-18T00:00:00Z",
+        "end": "2020-12-18T11:15:00Z",
         "query": "",
         "intervals": ["2020-12-18T00:00:00Z"],
         "groups": [
@@ -266,6 +272,8 @@ def test_massage_unbalanced_results():
     ]
 
     expected_result = {
+        "start": "2020-12-18T00:00:00Z",
+        "end": "2020-12-18T11:15:00Z",
         "query": "",
         "intervals": ["2020-12-18T00:00:00Z"],
         "groups": [
@@ -295,6 +303,8 @@ def test_massage_simple_timeseries():
     ]
 
     expected_result = {
+        "start": "2020-12-17T12:00:00Z",
+        "end": "2020-12-18T11:15:00Z",
         "query": "",
         "intervals": [
             "2020-12-17T12:00:00Z",
@@ -323,6 +333,8 @@ def test_massage_exact_timeseries():
     ]
 
     expected_result = {
+        "start": "2020-12-17T12:00:00Z",
+        "end": "2020-12-18T12:00:00Z",
         "query": "",
         "intervals": [
             "2020-12-17T12:00:00Z",
@@ -368,6 +380,8 @@ def test_massage_groupby_timeseries():
     ]
 
     expected_result = {
+        "start": "2020-12-17T12:00:00Z",
+        "end": "2020-12-18T11:15:00Z",
         "query": "",
         "intervals": [
             "2020-12-17T12:00:00Z",
@@ -438,6 +452,8 @@ def test_massage_virtual_groupby_timeseries():
     ]
 
     expected_result = {
+        "start": "2020-12-17T18:00:00Z",
+        "end": "2020-12-18T13:26:00Z",
         "query": "",
         "intervals": [
             "2020-12-17T18:00:00Z",
@@ -504,6 +520,8 @@ def test_nan_duration():
     ]
 
     expected_result = {
+        "start": "2020-12-17T12:00:00Z",
+        "end": "2020-12-18T11:15:00Z",
         "query": "",
         "intervals": [
             "2020-12-17T12:00:00Z",