Browse Source

feat: Implement Session Stats API (#22770)

This is a new API for querying sessions and has a query
interface similar to discover, and returns
timeseries data.
Arpad Borsos 4 years ago
parent
commit
3e70867d78

+ 50 - 0
src/sentry/api/endpoints/organization_sessions.py

@@ -0,0 +1,50 @@
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+from rest_framework.response import Response
+from rest_framework.exceptions import ParseError
+
+import six
+import sentry_sdk
+
+from sentry.api.bases import OrganizationEventsEndpointBase
+from sentry.snuba.sessions_v2 import (
+    InvalidField,
+    QueryDefinition,
+    run_sessions_query,
+    massage_sessions_result,
+)
+
+
+# NOTE: this currently extends `OrganizationEventsEndpointBase` for `handle_query_errors` only, which should ideally be decoupled from the base class.
+class OrganizationSessionsEndpoint(OrganizationEventsEndpointBase):
+    def get(self, request, organization):
+        with self.handle_query_errors():
+            with sentry_sdk.start_span(op="sessions.endpoint", description="build_sessions_query"):
+                query = self.build_sessions_query(request, organization)
+
+            with sentry_sdk.start_span(op="sessions.endpoint", description="run_sessions_query"):
+                result_totals, result_timeseries = run_sessions_query(query)
+
+            with sentry_sdk.start_span(
+                op="sessions.endpoint", description="massage_sessions_result"
+            ):
+                result = massage_sessions_result(query, result_totals, result_timeseries)
+            return Response(result, status=200)
+
+    def build_sessions_query(self, request, organization):
+        # validate and default all `project` params.
+        projects = self.get_projects(request, organization)
+        project_ids = [p.id for p in projects]
+
+        return QueryDefinition(request.GET, project_ids)
+
+    @contextmanager
+    def handle_query_errors(self):
+        try:
+            # TODO: this context manager should be decoupled from `OrganizationEventsEndpointBase`?
+            with super(OrganizationSessionsEndpoint, self).handle_query_errors():
+                yield
+        except InvalidField as error:
+            raise ParseError(detail=six.text_type(error))

+ 6 - 0
src/sentry/api/urls.py

@@ -171,6 +171,7 @@ from .endpoints.organization_repository_details import OrganizationRepositoryDet
 from .endpoints.organization_join_request import OrganizationJoinRequestEndpoint
 from .endpoints.organization_search_details import OrganizationSearchDetailsEndpoint
 from .endpoints.organization_searches import OrganizationSearchesEndpoint
+from .endpoints.organization_sessions import OrganizationSessionsEndpoint
 from .endpoints.organization_sentry_apps import OrganizationSentryAppsEndpoint
 from .endpoints.organization_shortid import ShortIdLookupEndpoint
 from .endpoints.organization_slugs import SlugsUpdateEndpoint
@@ -963,6 +964,11 @@ urlpatterns = [
                     OrganizationSearchesEndpoint.as_view(),
                     name="sentry-api-0-organization-searches",
                 ),
+                url(
+                    r"^(?P<organization_slug>[^\/]+)/sessions/$",
+                    OrganizationSessionsEndpoint.as_view(),
+                    name="sentry-api-0-organization-sessions",
+                ),
                 url(
                     r"^(?P<organization_slug>[^\/]+)/users/issues/$",
                     OrganizationUserIssuesSearchEndpoint.as_view(),

+ 43 - 2
src/sentry/api/utils.py

@@ -2,12 +2,13 @@ from __future__ import absolute_import
 
 from datetime import timedelta
 
+import math
 import six
 from django.utils import timezone
 
 from sentry.search.utils import parse_datetime_string, InvalidQuery
-from sentry.utils.dates import parse_stats_period
-
+from sentry.utils.dates import parse_stats_period, to_timestamp, to_datetime
+from sentry.constants import MAX_ROLLUP_POINTS
 
 MAX_STATS_PERIOD = timedelta(days=90)
 
@@ -83,3 +84,43 @@ def get_date_range_from_params(params, optional=False):
         raise InvalidParams("start must be before end")
 
     return start, end
+
+
+def get_date_range_rollup_from_params(
+    params,
+    minimum_interval="1h",
+    default_interval="",
+    round_range=False,
+    max_points=MAX_ROLLUP_POINTS,
+):
+    """
+    Similar to `get_date_range_from_params`, but this also parses and validates
+    an `interval`, as `get_rollup_from_request` would do.
+
+    This also optionally rounds the returned range to the given `interval`.
+    The rounding uses integer arithmetic on unix timestamps, so might yield
+    unexpected results when the interval is > 1d.
+    """
+    minimum_interval = parse_stats_period(minimum_interval).total_seconds()
+    interval = parse_stats_period(params.get("interval", default_interval))
+    interval = minimum_interval if interval is None else interval.total_seconds()
+    if interval <= 0:
+        raise InvalidParams("Interval cannot result in a zero duration.")
+
+    # round the interval up to the minimum
+    interval = int(minimum_interval * math.ceil(interval / minimum_interval))
+
+    start, end = get_date_range_from_params(params)
+    date_range = end - start
+    if date_range.total_seconds() / interval > max_points:
+        raise InvalidParams(
+            "Your interval and date range would create too many results. "
+            "Use a larger interval, or a smaller date range."
+        )
+
+    if round_range:
+        end_ts = int(interval * math.ceil(to_timestamp(end) / interval))
+        end = to_datetime(end_ts)
+        start = end - date_range
+
+    return start, end, interval

+ 413 - 0
src/sentry/snuba/sessions_v2.py

@@ -0,0 +1,413 @@
+from __future__ import absolute_import
+
+from datetime import datetime
+
+import six
+import itertools
+
+from sentry.api.event_search import get_filter
+from sentry.api.utils import get_date_range_rollup_from_params
+from sentry.utils.dates import to_timestamp
+from sentry.utils.snuba import Dataset, raw_query
+
+"""
+The new Sessions API defines a "metrics"-like interface which is can be used in
+a similar way to "discover".
+See https://www.notion.so/sentry/Session-Stats-API-0016d3713d1a4276be0396a338c7930a
+
+# "Metrics"
+
+We have basically 3 "metrics" that we can query:
+
+- `session` (counter): The number of sessions that occurred
+- `user` (set): The set of `distinct_id`s.
+- `session.duration` (histogram): The duration of individual sessions
+  (not available for pre-aggregated sessions)
+
+# "Operations" on metrics
+
+Depending on the metric *type*, we can query different things:
+
+- `counter`: Can only be accessed via the `sum` function.
+- `set`: Can only be accessed via the `count_unique` function.
+- `histogram`: Can have different quantiles / averages available via:
+  `avg`, `p50`...`p99`, `max`.
+
+# Tags / Grouping
+
+The Session data can be grouped by a set of tags, which can only appear in the
+`groupBy` of the query.
+
+- `project`
+- `environment`
+- `release`:
+  TODO: describe specific release filters such as `release.major`, etc
+
+## "Virtual" tags
+
+The `session.status` is considered a "virtual" tag, as it does not appear as
+such in the current session dataset. Instead the status is represented as
+different columns in dataset, and it is "exploded" into distinct groups purely
+in code, which is the tricky part.
+
+Essentially, a Row like this:
+```
+{
+    sessions: 123
+    sessions_abnormal: 4,
+    sessions_crashed: 3,
+    sessions_errored: 23,
+}
+```
+
+Is then "exploded" into something like:
+
+```
+[{
+    by: { "session.status": "healthy" },
+    series: {
+        "sum(session)": [100, ...] <- this is `sessions - sessions_errored`
+    }
+}, {
+    by: { "session.status": "errored" },
+    series: {
+        "sum(session)": [23, ...]
+    }
+},
+...
+]
+```
+"""
+
+
+class SessionsField(object):
+    def get_snuba_columns(self, raw_groupby):
+        if "session.status" in raw_groupby:
+            return ["sessions", "sessions_abnormal", "sessions_crashed", "sessions_errored"]
+        return ["sessions"]
+
+    def extract_from_row(self, row, group):
+        if row is None:
+            return 0
+        status = group.get("session.status")
+        if status is None:
+            return row["sessions"]
+        if status == "healthy":
+            return row["sessions"] - row["sessions_errored"]
+        if status == "abnormal":
+            return row["sessions_abnormal"]
+        if status == "crashed":
+            return row["sessions_crashed"]
+        if status == "errored":
+            return row["sessions_errored"]
+        return 0
+
+
+class UsersField(object):
+    def get_snuba_columns(self, raw_groupby):
+        if "session.status" in raw_groupby:
+            return ["users", "users_abnormal", "users_crashed", "users_errored"]
+        return ["users"]
+
+    def extract_from_row(self, row, group):
+        if row is None:
+            return 0
+        status = group.get("session.status")
+        if status is None:
+            return row["users"]
+        if status == "healthy":
+            return row["users"] - row["users_errored"]
+        if status == "abnormal":
+            return row["users_abnormal"]
+        if status == "crashed":
+            return row["users_crashed"]
+        if status == "errored":
+            return row["users_errored"]
+        return 0
+
+
+class DurationAverageField(object):
+    def get_snuba_columns(self, raw_groupby):
+        return ["duration_avg"]
+
+    def extract_from_row(self, row, group):
+        if row is None:
+            return None
+        status = group.get("session.status")
+        if status is None or status == "healthy":
+            return row["duration_avg"]
+        return None
+
+
+class DurationQuantileField(object):
+    def __init__(self, quantile_index):
+        self.quantile_index = quantile_index
+
+    def get_snuba_columns(self, raw_groupby):
+        return ["duration_quantiles"]
+
+    def extract_from_row(self, row, group):
+        if row is None:
+            return None
+        status = group.get("session.status")
+        if status is None or status == "healthy":
+            return row["duration_quantiles"][self.quantile_index]
+        return None
+
+
+COLUMN_MAP = {
+    "sum(session)": SessionsField(),
+    "count_unique(user)": UsersField(),
+    "avg(session.duration)": DurationAverageField(),
+    "p50(session.duration)": DurationQuantileField(0),
+    "p75(session.duration)": DurationQuantileField(1),
+    "p90(session.duration)": DurationQuantileField(2),
+    "p95(session.duration)": DurationQuantileField(3),
+    "p99(session.duration)": DurationQuantileField(4),
+    "max(session.duration)": DurationQuantileField(5),
+}
+
+
+class SimpleGroupBy(object):
+    def __init__(self, row_name, name=None):
+        self.row_name = row_name
+        self.name = name or row_name
+
+    def get_snuba_columns(self):
+        return [self.row_name]
+
+    def get_snuba_groupby(self):
+        return [self.row_name]
+
+    def get_keys_for_row(self, row):
+        return [(self.name, row[self.row_name])]
+
+
+class SessionStatusGroupBy(object):
+    def get_snuba_columns(self):
+        return []
+
+    def get_snuba_groupby(self):
+        return []
+
+    def get_keys_for_row(self, row):
+        return [("session.status", key) for key in ["healthy", "abnormal", "crashed", "errored"]]
+
+
+GROUPBY_MAP = {
+    "project": SimpleGroupBy("project_id", "project"),
+    "environment": SimpleGroupBy("environment"),
+    "release": SimpleGroupBy("release"),
+    "session.status": SessionStatusGroupBy(),
+}
+
+
+class InvalidField(Exception):
+    pass
+
+
+class QueryDefinition(object):
+    """
+    This is the definition of the query the user wants to execute.
+    This is constructed out of the request params, and also contains a list of
+    `fields` and `groupby` definitions as [`ColumnDefinition`] objects.
+    """
+
+    def __init__(self, query, project_ids=None):
+        self.query = query.get("query", "")
+        raw_fields = query.getlist("field", [])
+        raw_groupby = query.getlist("groupBy", [])
+
+        if len(raw_fields) == 0:
+            raise InvalidField(u'Request is missing a "field"')
+
+        self.fields = {}
+        for key in raw_fields:
+            if key not in COLUMN_MAP:
+                raise InvalidField(u'Invalid field: "{}"'.format(key))
+            self.fields[key] = COLUMN_MAP[key]
+
+        self.groupby = []
+        for key in raw_groupby:
+            if key not in GROUPBY_MAP:
+                raise InvalidField(u'Invalid groupBy: "{}"'.format(key))
+            self.groupby.append(GROUPBY_MAP[key])
+
+        start, end, rollup = get_date_range_rollup_from_params(query, "1h", round_range=True)
+        self.rollup = rollup
+        self.start = start
+        self.end = end
+
+        query_columns = set()
+        for field in self.fields.values():
+            query_columns.update(field.get_snuba_columns(raw_groupby))
+        for groupby in self.groupby:
+            query_columns.update(groupby.get_snuba_columns())
+        self.query_columns = list(query_columns)
+
+        query_groupby = set()
+        for groupby in self.groupby:
+            query_groupby.update(groupby.get_snuba_groupby())
+        self.query_groupby = list(query_groupby)
+
+        params = {"project_id": project_ids or []}
+        snuba_filter = get_filter(self.query, params)
+
+        self.aggregations = snuba_filter.aggregations
+        self.conditions = snuba_filter.conditions
+        self.filter_keys = snuba_filter.filter_keys
+
+
+TS_COL = "bucketed_started"
+
+
+def run_sessions_query(query):
+    """
+    Runs the `query` as defined by [`QueryDefinition`] two times, once for the
+    `totals` and again for the actual time-series data grouped by the requested
+    interval.
+    """
+    result_totals = raw_query(
+        dataset=Dataset.Sessions,
+        selected_columns=query.query_columns,
+        groupby=query.query_groupby,
+        aggregations=query.aggregations,
+        conditions=query.conditions,
+        filter_keys=query.filter_keys,
+        start=query.start,
+        end=query.end,
+        rollup=query.rollup,
+        referrer="sessions.totals",
+    )
+
+    result_timeseries = raw_query(
+        dataset=Dataset.Sessions,
+        selected_columns=[TS_COL] + query.query_columns,
+        groupby=[TS_COL] + query.query_groupby,
+        aggregations=query.aggregations,
+        conditions=query.conditions,
+        filter_keys=query.filter_keys,
+        start=query.start,
+        end=query.end,
+        rollup=query.rollup,
+        referrer="sessions.timeseries",
+    )
+
+    return result_totals["data"], result_timeseries["data"]
+
+
+def massage_sessions_result(query, result_totals, result_timeseries):
+    """
+    Post-processes the query result.
+
+    Given the `query` as defined by [`QueryDefinition`] and its totals and
+    timeseries results from snuba, groups and transforms the result into the
+    expected format.
+
+    For example:
+    ```json
+    {
+      "intervals": [
+        "2020-12-16T00:00:00Z",
+        "2020-12-16T12:00:00Z",
+        "2020-12-17T00:00:00Z"
+      ],
+      "groups": [
+        {
+          "by": { "release": "99b8edc5a3bb49d01d16426d7bb9c511ec41f81e" },
+          "series": { "sum(session)": [0, 1, 0] },
+          "totals": { "sum(session)": 1 }
+        },
+        {
+          "by": { "release": "test-example-release" },
+          "series": { "sum(session)": [0, 10, 20] },
+          "totals": { "sum(session)": 30 }
+        }
+      ]
+    }
+    ```
+    """
+    timestamps = _get_timestamps(query)
+
+    total_groups = _split_rows_groupby(result_totals, query.groupby)
+    timeseries_groups = _split_rows_groupby(result_timeseries, query.groupby)
+
+    def make_timeseries(rows, group):
+        for row in rows:
+            row[TS_COL] = row[TS_COL][:19] + "Z"
+
+        rows.sort(key=lambda row: row[TS_COL])
+        fields = [(name, field, list()) for name, field in query.fields.items()]
+        group_index = 0
+
+        while group_index < len(rows):
+            row = rows[group_index]
+            if row[TS_COL] < timestamps[0]:
+                group_index += 1
+            else:
+                break
+
+        for ts in timestamps:
+            row = rows[group_index] if group_index < len(rows) else None
+            if row is not None and row[TS_COL] == ts:
+                group_index += 1
+            else:
+                row = None
+
+            for (name, field, series) in fields:
+                series.append(field.extract_from_row(row, group))
+
+        return {name: series for (name, field, series) in fields}
+
+    def make_totals(totals, group):
+        return {
+            name: field.extract_from_row(totals[0], group) for name, field in query.fields.items()
+        }
+
+    groups = []
+    for key, totals in total_groups.items():
+        by = dict(key)
+
+        group = {
+            "by": by,
+            "totals": make_totals(totals, by),
+            "series": make_timeseries(timeseries_groups[key], by),
+        }
+
+        groups.append(group)
+
+    return {
+        "query": query.query,
+        "intervals": timestamps,
+        "groups": groups,
+    }
+
+
+def _get_timestamps(query):
+    """
+    Generates a list of timestamps according to `query`.
+    The timestamps are returned as ISO strings for now.
+    """
+    rollup = query.rollup
+    start = int(to_timestamp(query.start))
+    end = int(to_timestamp(query.end))
+    return [
+        datetime.utcfromtimestamp(ts).isoformat() + "Z"
+        for ts in six.moves.xrange(start, end, rollup)
+    ]
+
+
+def _split_rows_groupby(rows, groupby):
+    groups = {}
+    for row in rows:
+        key_parts = (group.get_keys_for_row(row) for group in groupby)
+        keys = itertools.product(*key_parts)
+
+        for key in keys:
+            key = frozenset(key)
+
+            if key not in groups:
+                groups[key] = []
+            groups[key].append(row)
+
+    return groups

+ 42 - 1
tests/sentry/api/test_utils.py

@@ -5,7 +5,12 @@ import datetime
 from django.utils import timezone
 from freezegun import freeze_time
 
-from sentry.api.utils import get_date_range_from_params, InvalidParams, MAX_STATS_PERIOD
+from sentry.api.utils import (
+    get_date_range_from_params,
+    get_date_range_rollup_from_params,
+    InvalidParams,
+    MAX_STATS_PERIOD,
+)
 from sentry.testutils import TestCase
 
 
@@ -54,3 +59,39 @@ class GetDateRangeFromParamsTest(TestCase):
 
         with self.assertRaises(InvalidParams):
             start, end = get_date_range_from_params({"statsPeriodStart": "14d"})
+
+
+class GetDateRangeRollupFromParamsTest(TestCase):
+    def test_intervals(self):
+        # defaults to 1h
+        start, end, interval = get_date_range_rollup_from_params({})
+        assert interval == 3600
+
+        # rounds up to a multiple of the minimum
+        start, end, interval = get_date_range_rollup_from_params(
+            {"statsPeriod": "14h", "interval": "8m"}, minimum_interval="5m"
+        )
+        assert interval == 600
+
+    @freeze_time("2018-12-11 03:21:34")
+    def test_round_range(self):
+        start, end, interval = get_date_range_rollup_from_params(
+            {"statsPeriod": "2d"}, round_range=True
+        )
+        assert start == datetime.datetime(2018, 12, 9, 4, tzinfo=timezone.utc)
+        assert end == datetime.datetime(2018, 12, 11, 4, tzinfo=timezone.utc)
+
+        start, end, interval = get_date_range_rollup_from_params(
+            {"statsPeriod": "2d", "interval": "1d"}, round_range=True
+        )
+        assert start == datetime.datetime(2018, 12, 10, tzinfo=timezone.utc)
+        assert end == datetime.datetime(2018, 12, 12, tzinfo=timezone.utc)
+
+    def test_invalid_interval(self):
+        with self.assertRaises(InvalidParams):
+            start, end, interval = get_date_range_rollup_from_params({"interval": "0d"})
+        with self.assertRaises(InvalidParams):
+            # defaults stats period is 90d
+            start, end, interval = get_date_range_rollup_from_params(
+                {"interval": "1d"}, max_points=80
+            )

+ 408 - 0
tests/snuba/api/endpoints/test_organization_sessions.py

@@ -0,0 +1,408 @@
+from __future__ import absolute_import
+
+import datetime
+import pytz
+
+from uuid import uuid4
+from freezegun import freeze_time
+
+from django.core.urlresolvers import reverse
+
+from sentry.testutils import APITestCase, SnubaTestCase
+from sentry.utils.dates import to_timestamp
+
+
+def result_sorted(result):
+    """sort the groups of the results array by the `by` object, ensuring a stable order"""
+
+    def stable_dict(d):
+        return tuple(sorted(d.items(), key=lambda t: t[0]))
+
+    result["groups"].sort(key=lambda group: stable_dict(group["by"]))
+    return result
+
+
+class OrganizationSessionsEndpointTest(APITestCase, SnubaTestCase):
+    def setUp(self):
+        super(OrganizationSessionsEndpointTest, self).setUp()
+        self.setup_fixture()
+
+    def setup_fixture(self):
+        self.timestamp = to_timestamp(datetime.datetime(2021, 1, 14, 12, 27, 28, tzinfo=pytz.utc))
+        self.received = self.timestamp
+        self.session_started = self.timestamp // 60 * 60
+
+        self.organization1 = self.organization
+        self.organization2 = self.create_organization()
+        self.project1 = self.project
+        self.project2 = self.create_project()
+        self.project3 = self.create_project()
+        self.project4 = self.create_project(organization=self.organization2)
+
+        template = {
+            "distinct_id": "00000000-0000-0000-0000-000000000000",
+            "status": "exited",
+            "seq": 0,
+            "release": "foo@1.0.0",
+            "environment": "production",
+            "retention_days": 90,
+            "duration": None,
+            "errors": 0,
+            "started": self.session_started,
+            "received": self.received,
+        }
+
+        def make_session(project, **kwargs):
+            return dict(
+                template,
+                session_id=uuid4().hex,
+                org_id=project.organization_id,
+                project_id=project.id,
+                **kwargs
+            )
+
+        self.store_session(make_session(self.project1))
+        self.store_session(make_session(self.project1, release="foo@1.1.0"))
+        self.store_session(make_session(self.project1, started=self.session_started - 60 * 60))
+        self.store_session(make_session(self.project1, started=self.session_started - 12 * 60 * 60))
+        self.store_session(make_session(self.project2, status="crashed"))
+        self.store_session(make_session(self.project2, environment="development"))
+        self.store_session(make_session(self.project3, errors=1))
+        self.store_session(
+            make_session(
+                self.project3,
+                distinct_id="39887d89-13b2-4c84-8c23-5d13d2102664",
+                started=self.session_started - 60 * 60,
+            )
+        )
+        self.store_session(
+            make_session(
+                self.project3, distinct_id="39887d89-13b2-4c84-8c23-5d13d2102664", errors=1
+            )
+        )
+        self.store_session(make_session(self.project4))
+
+    def do_request(self, query):
+        self.login_as(user=self.user)
+        url = reverse(
+            "sentry-api-0-organization-sessions",
+            kwargs={"organization_slug": self.organization.slug},
+        )
+        return self.client.get(url, query, format="json")
+
+    def test_empty_request(self):
+        response = self.do_request({})
+
+        assert response.status_code == 400, response.content
+        assert response.data == {"detail": 'Request is missing a "field"'}
+
+    def test_inaccessible_project(self):
+        response = self.do_request({"project": [self.project4.id]})
+
+        assert response.status_code == 403, response.content
+        assert response.data == {"detail": "You do not have permission to perform this action."}
+
+    def test_unknown_field(self):
+        response = self.do_request({"field": ["summ(sessin)"]})
+
+        assert response.status_code == 400, response.content
+        assert response.data == {"detail": 'Invalid field: "summ(sessin)"'}
+
+    def test_unknown_groupby(self):
+        response = self.do_request({"field": ["sum(session)"], "groupBy": ["envriomnent"]})
+
+        assert response.status_code == 400, response.content
+        assert response.data == {"detail": 'Invalid groupBy: "envriomnent"'}
+
+    def test_too_many_points(self):
+        # TODO: looks like this is well within the range of valid points
+        return
+        # default statsPeriod is 90d
+        response = self.do_request({"field": ["sum(session)"], "interval": "1h"})
+
+        assert response.status_code == 400, response.content
+        assert response.data == {}
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_timeseries_interval(self):
+        response = self.do_request(
+            {"statsPeriod": "1d", "interval": "1d", "field": ["sum(session)"]}
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data) == {
+            "query": "",
+            "intervals": ["2021-01-14T00:00:00Z"],
+            "groups": [{"by": {}, "series": {"sum(session)": [9]}, "totals": {"sum(session)": 9}}],
+        }
+
+        response = self.do_request(
+            {"statsPeriod": "1d", "interval": "6h", "field": ["sum(session)"]}
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data) == {
+            "query": "",
+            "intervals": [
+                "2021-01-13T18:00:00Z",
+                "2021-01-14T00:00:00Z",
+                "2021-01-14T06:00:00Z",
+                "2021-01-14T12:00:00Z",
+            ],
+            "groups": [
+                {"by": {}, "series": {"sum(session)": [0, 1, 2, 6]}, "totals": {"sum(session)": 9}}
+            ],
+        }
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_minimum_interval(self):
+        # smallest interval is 1h
+        response = self.do_request(
+            {"statsPeriod": "2h", "interval": "5m", "field": ["sum(session)"]}
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data) == {
+            "query": "",
+            "intervals": ["2021-01-14T11:00:00Z", "2021-01-14T12:00:00Z"],
+            "groups": [
+                {"by": {}, "series": {"sum(session)": [2, 6]}, "totals": {"sum(session)": 8}}
+            ],
+        }
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_filter_projects(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "project": [self.project2.id, self.project3.id],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {"by": {}, "series": {"sum(session)": [5]}, "totals": {"sum(session)": 5}}
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_filter_environment(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "query": "environment:development",
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {"by": {}, "series": {"sum(session)": [1]}, "totals": {"sum(session)": 1}}
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_filter_release(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "query": "release:foo@1.1.0",
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {"by": {}, "series": {"sum(session)": [1]}, "totals": {"sum(session)": 1}}
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_groupby_project(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "groupBy": ["project"],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {
+                "by": {"project": self.project1.id},
+                "series": {"sum(session)": [4]},
+                "totals": {"sum(session)": 4},
+            },
+            {
+                "by": {"project": self.project2.id},
+                "series": {"sum(session)": [2]},
+                "totals": {"sum(session)": 2},
+            },
+            {
+                "by": {"project": self.project3.id},
+                "series": {"sum(session)": [3]},
+                "totals": {"sum(session)": 3},
+            },
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_groupby_environment(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "groupBy": ["environment"],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {
+                "by": {"environment": "development"},
+                "series": {"sum(session)": [1]},
+                "totals": {"sum(session)": 1},
+            },
+            {
+                "by": {"environment": "production"},
+                "series": {"sum(session)": [8]},
+                "totals": {"sum(session)": 8},
+            },
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_groupby_release(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "groupBy": ["release"],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {
+                "by": {"release": "foo@1.0.0"},
+                "series": {"sum(session)": [8]},
+                "totals": {"sum(session)": 8},
+            },
+            {
+                "by": {"release": "foo@1.1.0"},
+                "series": {"sum(session)": [1]},
+                "totals": {"sum(session)": 1},
+            },
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_groupby_status(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "groupBy": ["session.status"],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {
+                "by": {"session.status": "abnormal"},
+                "series": {"sum(session)": [0]},
+                "totals": {"sum(session)": 0},
+            },
+            {
+                "by": {"session.status": "crashed"},
+                "series": {"sum(session)": [1]},
+                "totals": {"sum(session)": 1},
+            },
+            {
+                "by": {"session.status": "errored"},
+                "series": {"sum(session)": [3]},
+                "totals": {"sum(session)": 3},
+            },
+            {
+                "by": {"session.status": "healthy"},
+                "series": {"sum(session)": [6]},
+                "totals": {"sum(session)": 6},
+            },
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_groupby_cross(self):
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["sum(session)"],
+                "groupBy": ["release", "environment"],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {
+                "by": {"environment": "development", "release": "foo@1.0.0"},
+                "series": {"sum(session)": [1]},
+                "totals": {"sum(session)": 1},
+            },
+            {
+                "by": {"environment": "production", "release": "foo@1.0.0"},
+                "series": {"sum(session)": [7]},
+                "totals": {"sum(session)": 7},
+            },
+            {
+                "by": {"environment": "production", "release": "foo@1.1.0"},
+                "series": {"sum(session)": [1]},
+                "totals": {"sum(session)": 1},
+            },
+        ]
+
+    @freeze_time("2021-01-14T12:27:28.303Z")
+    def test_users_groupby(self):
+        response = self.do_request(
+            {"statsPeriod": "1d", "interval": "1d", "field": ["count_unique(user)"]}
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {"by": {}, "series": {"count_unique(user)": [1]}, "totals": {"count_unique(user)": 1}}
+        ]
+
+        response = self.do_request(
+            {
+                "statsPeriod": "1d",
+                "interval": "1d",
+                "field": ["count_unique(user)"],
+                "groupBy": ["session.status"],
+            }
+        )
+
+        assert response.status_code == 200, response.content
+        assert result_sorted(response.data)["groups"] == [
+            {
+                "by": {"session.status": "abnormal"},
+                "series": {"count_unique(user)": [0]},
+                "totals": {"count_unique(user)": 0},
+            },
+            {
+                "by": {"session.status": "crashed"},
+                "series": {"count_unique(user)": [0]},
+                "totals": {"count_unique(user)": 0},
+            },
+            {
+                "by": {"session.status": "errored"},
+                "series": {"count_unique(user)": [1]},
+                "totals": {"count_unique(user)": 1},
+            },
+            {
+                "by": {"session.status": "healthy"},
+                "series": {"count_unique(user)": [0]},
+                "totals": {"count_unique(user)": 0},
+            },
+        ]

+ 240 - 0
tests/snuba/sessions/test_sessions_v2.py

@@ -0,0 +1,240 @@
+from __future__ import absolute_import
+
+from freezegun import freeze_time
+from django.http import QueryDict
+
+# from sentry.testutils import TestCase
+from sentry.snuba.sessions_v2 import (
+    QueryDefinition,
+    massage_sessions_result,
+    _get_timestamps,
+)
+
+
+def _make_query(qs):
+    return QueryDefinition(QueryDict(qs), {})
+
+
+def result_sorted(result):
+    """sort the groups of the results array by the `by` object, ensuring a stable order"""
+
+    def stable_dict(d):
+        return tuple(sorted(d.items(), key=lambda t: t[0]))
+
+    result["groups"].sort(key=lambda group: stable_dict(group["by"]))
+    return result
+
+
+@freeze_time("2020-12-18T11:14:17.105Z")
+def test_timestamps():
+    query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)")
+
+    expected_timestamps = ["2020-12-17T12:00:00Z", "2020-12-18T00:00:00Z"]
+    actual_timestamps = _get_timestamps(query)
+
+    assert actual_timestamps == expected_timestamps
+
+
+def test_simple_query():
+    query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)")
+
+    assert query.query_columns == ["sessions"]
+
+
+def test_groupby_query():
+    query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)&groupBy=release")
+
+    assert sorted(query.query_columns) == ["release", "sessions"]
+    assert query.query_groupby == ["release"]
+
+
+def test_virtual_groupby_query():
+    query = _make_query("statsPeriod=1d&interval=12h&field=sum(session)&groupBy=session.status")
+
+    assert sorted(query.query_columns) == [
+        "sessions",
+        "sessions_abnormal",
+        "sessions_crashed",
+        "sessions_errored",
+    ]
+    assert query.query_groupby == []
+
+    query = _make_query(
+        "statsPeriod=1d&interval=12h&field=count_unique(user)&groupBy=session.status"
+    )
+
+    assert sorted(query.query_columns) == [
+        "users",
+        "users_abnormal",
+        "users_crashed",
+        "users_errored",
+    ]
+    assert query.query_groupby == []
+
+
+@freeze_time("2020-12-18T11:14:17.105Z")
+def test_massage_simple_timeseries():
+    """A timeseries is filled up when it only receives partial data"""
+
+    query = _make_query("statsPeriod=1d&interval=6h&field=sum(session)")
+    result_totals = [{"sessions": 4}]
+    # snuba returns the datetimes as strings for now
+    result_timeseries = [
+        {"sessions": 2, "bucketed_started": "2020-12-17T12:00:00+00:00"},
+        {"sessions": 2, "bucketed_started": "2020-12-18T06:00:00+00:00"},
+    ]
+
+    expected_result = {
+        "query": "",
+        "intervals": [
+            "2020-12-17T12:00:00Z",
+            "2020-12-17T18:00:00Z",
+            "2020-12-18T00:00:00Z",
+            "2020-12-18T06:00:00Z",
+        ],
+        "groups": [
+            {"by": {}, "series": {"sum(session)": [2, 0, 0, 2]}, "totals": {"sum(session)": 4}}
+        ],
+    }
+
+    actual_result = result_sorted(massage_sessions_result(query, result_totals, result_timeseries))
+
+    assert actual_result == expected_result
+
+
+@freeze_time("2020-12-18T11:14:17.105Z")
+def test_massage_groupby_timeseries():
+    query = _make_query("statsPeriod=1d&interval=6h&field=sum(session)&groupBy=release")
+
+    result_totals = [
+        {"release": "test-example-release", "sessions": 4},
+        {"release": "test-example-release-2", "sessions": 1},
+    ]
+    # snuba returns the datetimes as strings for now
+    result_timeseries = [
+        {
+            "release": "test-example-release",
+            "sessions": 2,
+            "bucketed_started": "2020-12-17T12:00:00+00:00",
+        },
+        {
+            "release": "test-example-release",
+            "sessions": 2,
+            "bucketed_started": "2020-12-18T06:00:00+00:00",
+        },
+        {
+            "release": "test-example-release-2",
+            "sessions": 1,
+            "bucketed_started": "2020-12-18T06:00:00+00:00",
+        },
+    ]
+
+    expected_result = {
+        "query": "",
+        "intervals": [
+            "2020-12-17T12:00:00Z",
+            "2020-12-17T18:00:00Z",
+            "2020-12-18T00:00:00Z",
+            "2020-12-18T06:00:00Z",
+        ],
+        "groups": [
+            {
+                "by": {"release": "test-example-release"},
+                "series": {"sum(session)": [2, 0, 0, 2]},
+                "totals": {"sum(session)": 4},
+            },
+            {
+                "by": {"release": "test-example-release-2"},
+                "series": {"sum(session)": [0, 0, 0, 1]},
+                "totals": {"sum(session)": 1},
+            },
+        ],
+    }
+
+    actual_result = result_sorted(massage_sessions_result(query, result_totals, result_timeseries))
+
+    assert actual_result == expected_result
+
+
+@freeze_time("2020-12-18T13:25:15.769Z")
+def test_massage_virtual_groupby_timeseries():
+    query = _make_query(
+        "statsPeriod=1d&interval=6h&field=sum(session)&field=count_unique(user)&groupBy=session.status"
+    )
+    result_totals = [
+        {
+            "users": 1,
+            "users_crashed": 1,
+            "sessions": 6,
+            "sessions_errored": 1,
+            "users_errored": 1,
+            "sessions_abnormal": 0,
+            "sessions_crashed": 1,
+            "users_abnormal": 0,
+        }
+    ]
+    # snuba returns the datetimes as strings for now
+    result_timeseries = [
+        {
+            "sessions_errored": 1,
+            "users": 1,
+            "users_crashed": 1,
+            "sessions_abnormal": 0,
+            "sessions": 3,
+            "users_errored": 1,
+            "users_abnormal": 0,
+            "sessions_crashed": 1,
+            "bucketed_started": "2020-12-18T12:00:00+00:00",
+        },
+        {
+            "sessions_errored": 0,
+            "users": 1,
+            "users_crashed": 0,
+            "sessions_abnormal": 0,
+            "sessions": 3,
+            "users_errored": 0,
+            "users_abnormal": 0,
+            "sessions_crashed": 0,
+            "bucketed_started": "2020-12-18T06:00:00+00:00",
+        },
+    ]
+
+    expected_result = {
+        "query": "",
+        "intervals": [
+            "2020-12-17T18:00:00Z",
+            "2020-12-18T00:00:00Z",
+            "2020-12-18T06:00:00Z",
+            "2020-12-18T12:00:00Z",
+        ],
+        "groups": [
+            {
+                "by": {"session.status": "abnormal"},
+                "series": {"count_unique(user)": [0, 0, 0, 0], "sum(session)": [0, 0, 0, 0]},
+                "totals": {"count_unique(user)": 0, "sum(session)": 0},
+            },
+            {
+                "by": {"session.status": "crashed"},
+                "series": {"count_unique(user)": [0, 0, 0, 1], "sum(session)": [0, 0, 0, 1]},
+                "totals": {"count_unique(user)": 1, "sum(session)": 1},
+            },
+            {
+                "by": {"session.status": "errored"},
+                "series": {"count_unique(user)": [0, 0, 0, 1], "sum(session)": [0, 0, 0, 1]},
+                "totals": {"count_unique(user)": 1, "sum(session)": 1},
+            },
+            {
+                "by": {"session.status": "healthy"},
+                "series": {"count_unique(user)": [0, 0, 1, 0], "sum(session)": [0, 0, 3, 2]},
+                # while in one of the time slots, we have a healthy user, it is
+                # the *same* user as the one experiencing a crash later on,
+                # so in the *whole* time window, that one user is not counted as healthy,
+                # so the `0` here is expected, as thats an example of the `count_unique` behavior.
+                "totals": {"count_unique(user)": 0, "sum(session)": 5},
+            },
+        ],
+    }
+
+    actual_result = result_sorted(massage_sessions_result(query, result_totals, result_timeseries))
+
+    assert actual_result == expected_result