Browse Source

perf(metrics): Use groupArraySample to take sample (#65259)

`argMin` was computationally expensive so let's switch to use
`groupArraySample` to take a random example. This also enables a
pagination like feature where we can use the random seed to change the
samples returned.
Tony Xiao 1 year ago
parent
commit
87b1eb1da8

+ 9 - 3
src/sentry/search/events/datasets/spans_indexed.py

@@ -196,12 +196,18 @@ class SpansIndexedDatasetConfig(DatasetConfig):
                 SnQLFunction(
                     "example",
                     snql_aggregate=lambda args, alias: Function(
-                        "argMin",
+                        "arrayElement",
                         [
                             Function(
-                                "tuple", [Column("group"), Column("timestamp"), Column("span_id")]
+                                "groupArraySample(1, 1)",  # TODO: paginate via the seed
+                                [
+                                    Function(
+                                        "tuple",
+                                        [Column("group"), Column("timestamp"), Column("span_id")],
+                                    ),
+                                ],
                             ),
-                            Function("cityHash64", [Column("span_id")]),
+                            1,
                         ],
                         alias,
                     ),

+ 2 - 2
tests/sentry/api/endpoints/test_organization_metrics.py

@@ -202,6 +202,6 @@ class OrganizationMetricsSamplesEndpointTest(APITestCase, BaseSpansTestCase):
         }
         response = self.do_request(query)
         assert response.status_code == 200, response.data
-        expected = {span["span_id"] for span in spans}
-        actual = {row["id"] for row in response.data["data"]}
+        expected = {int(span["span_id"], 16) for span in spans}
+        actual = {int(row["id"], 16) for row in response.data["data"]}
         assert actual == expected