Browse Source

fix(discover): Top 5 events with timestamp fields (#25003)

The top 5 stats query fails when combined with timestamp.to_hour or
timestamp.to_day. This change special cases these fields and handles them like
the timestamp field with an OR query.
Tony 3 years ago
parent
commit
548f6a2a7d

+ 6 - 4
src/sentry/snuba/discover.py

@@ -458,7 +458,7 @@ def top_events_timeseries(
     ) as span:
         span.set_data("query", user_query)
         snuba_filter, translated_columns = get_timeseries_snuba_filter(
-            list(set(timeseries_columns + selected_columns)),
+            list(sorted(set(timeseries_columns + selected_columns))),
             user_query,
             params,
             rollup,
@@ -481,9 +481,11 @@ def top_events_timeseries(
                 }
             )
             if values:
-                # timestamp needs special handling, creating a big OR instead
-                if field == "timestamp":
-                    snuba_filter.conditions.append([["timestamp", "=", value] for value in values])
+                # timestamp fields needs special handling, creating a big OR instead
+                if field == "timestamp" or field.startswith("timestamp.to_"):
+                    snuba_filter.conditions.append(
+                        [[field, "=", value] for value in sorted(values)]
+                    )
                 elif None in values:
                     non_none_values = [value for value in values if value is not None]
                     condition = [[["isNull", [resolve_discover_column(field)]], "=", 1]]

+ 77 - 0
tests/sentry/snuba/test_discover.py

@@ -2505,6 +2505,83 @@ class TopEventsTimeseriesQueryTest(TimeseriesBase):
             referrer=None,
         )
 
+    @patch("sentry.snuba.discover.raw_query")
+    def test_timestamp_fields(self, mock_query):
+        timestamp1 = before_now(days=2, minutes=5)
+        timestamp2 = before_now(minutes=2)
+        top_events = {
+            "data": [
+                {
+                    "timestamp": iso_format(timestamp1),
+                    "timestamp.to_hour": iso_format(timestamp1.replace(minute=0, second=0)),
+                    "timestamp.to_day": iso_format(timestamp1.replace(hour=0, minute=0, second=0)),
+                },
+                {
+                    "timestamp": iso_format(timestamp2),
+                    "timestamp.to_hour": iso_format(timestamp2.replace(minute=0, second=0)),
+                    "timestamp.to_day": iso_format(timestamp2.replace(hour=0, minute=0, second=0)),
+                },
+            ]
+        }
+        start = before_now(days=3, minutes=10)
+        end = before_now(minutes=1)
+        discover.top_events_timeseries(
+            selected_columns=["timestamp", "timestamp.to_day", "timestamp.to_hour", "count()"],
+            params={
+                "start": start,
+                "end": end,
+                "project_id": [self.project.id],
+            },
+            rollup=3600,
+            top_events=top_events,
+            timeseries_columns=["count()"],
+            user_query="",
+            orderby=["count()"],
+            limit=10000,
+            organization=self.organization,
+        )
+        mock_query.assert_called_with(
+            aggregations=[["count", None, "count"]],
+            conditions=[
+                # Each timestamp field should generated a nested condition.
+                # Within each, the conditions will be ORed together.
+                [
+                    ["timestamp", "=", iso_format(timestamp1)],
+                    ["timestamp", "=", iso_format(timestamp2)],
+                ],
+                [
+                    [
+                        "timestamp.to_day",
+                        "=",
+                        iso_format(timestamp1.replace(hour=0, minute=0, second=0)),
+                    ],
+                    [
+                        "timestamp.to_day",
+                        "=",
+                        iso_format(timestamp2.replace(hour=0, minute=0, second=0)),
+                    ],
+                ],
+                [
+                    ["timestamp.to_hour", "=", iso_format(timestamp1.replace(minute=0, second=0))],
+                    ["timestamp.to_hour", "=", iso_format(timestamp2.replace(minute=0, second=0))],
+                ],
+            ],
+            filter_keys={"project_id": [self.project.id]},
+            selected_columns=[
+                "timestamp",
+                ["toStartOfDay", ["timestamp"], "timestamp.to_day"],
+                ["toStartOfHour", ["timestamp"], "timestamp.to_hour"],
+            ],
+            start=start,
+            end=end,
+            rollup=3600,
+            orderby=["time", "timestamp", "timestamp.to_day", "timestamp.to_hour"],
+            groupby=["time", "timestamp", "timestamp.to_day", "timestamp.to_hour"],
+            dataset=Dataset.Discover,
+            limit=10000,
+            referrer=None,
+        )
+
 
 def format_project_event(project_slug, event_id):
     return f"{project_slug}:{event_id}"

+ 36 - 0
tests/snuba/api/endpoints/test_organization_events_stats.py

@@ -1505,3 +1505,39 @@ class OrganizationEventsStatsTopNEvents(APITestCase, SnubaTestCase):
             )
         assert response.status_code == 400
         assert "zero duration" in response.data["detail"]
+
+    def test_top_events_timestamp_fields(self):
+        with self.feature("organizations:discover-basic"):
+            response = self.client.get(
+                self.url,
+                format="json",
+                data={
+                    "start": iso_format(self.day_ago),
+                    "end": iso_format(self.day_ago + timedelta(hours=2)),
+                    "interval": "1h",
+                    "yAxis": "count()",
+                    "orderby": ["-count()"],
+                    "field": ["count()", "timestamp", "timestamp.to_hour", "timestamp.to_day"],
+                    "topEvents": 5,
+                },
+            )
+        assert response.status_code == 200
+        data = response.data
+        assert len(data) == 3
+
+        # these are the timestamps corresponding to the events stored
+        timestamps = [
+            self.day_ago + timedelta(minutes=2),
+            self.day_ago + timedelta(hours=1, minutes=2),
+            self.day_ago + timedelta(minutes=4),
+        ]
+        timestamp_hours = [timestamp.replace(minute=0, second=0) for timestamp in timestamps]
+        timestamp_days = [timestamp.replace(hour=0, minute=0, second=0) for timestamp in timestamps]
+
+        for ts, ts_hr, ts_day in zip(timestamps, timestamp_hours, timestamp_days):
+            key = f"{iso_format(ts)}+00:00,{iso_format(ts_day)}+00:00,{iso_format(ts_hr)}+00:00"
+            count = sum(
+                e["count"] for e in self.event_data if e["data"]["timestamp"] == iso_format(ts)
+            )
+            results = data[key]
+            assert [{"count": count}] in [attrs for time, attrs in results["data"]]