Browse Source

feat(perf): Improve experimental performance facets api (#24614)

* feat(perf): Improve experimental performance facets api

This brings back sorting tag keys by count, which also allows sampling to only happen at a minimum number of tag values. In addition it adds another aggregation to allow sorting by count and adds a new named tuple instead of mis-using the existing one.
k-fish 4 years ago
parent
commit
ce898db90c

+ 2 - 1
src/sentry/api/endpoints/organization_events_facets_performance.py

@@ -46,7 +46,8 @@ class OrganizationEventsFacetsPerformanceEndpoint(OrganizationEventsV2EndpointBa
                     {
                         "name": tagstore.get_tag_value_label(row.key, row.value),
                         "value": row.value,
-                        f"avg_{aggregate_column}": row.count,
+                        "count": row.count,
+                        "aggregate": row.performance,
                     }
                 )
         return Response(list(resp.values()))

+ 16 - 6
src/sentry/snuba/discover.py

@@ -49,6 +49,9 @@ logger = logging.getLogger(__name__)
 
 PaginationResult = namedtuple("PaginationResult", ["next", "previous", "oldest", "latest"])
 FacetResult = namedtuple("FacetResult", ["key", "value", "count"])
+PerformanceFacetResult = namedtuple(
+    "PerformanceFacetResult", ["key", "value", "performance", "count"]
+)
 
 resolve_discover_column = resolve_column(Dataset.Discover)
 
@@ -775,14 +778,14 @@ def get_performance_facets(
     sample = len(snuba_filter.filter_keys["project_id"]) > 2
 
     with sentry_sdk.start_span(op="discover.discover", description="facets.frequent_tags"):
-        # Get the tag keys with the highest deviation
+        # Get the most relevant tag keys
         key_names = raw_query(
-            aggregations=[["stddevSamp", aggregate_column, "stddev"]],
+            aggregations=[["count", None, "count"]],
             start=snuba_filter.start,
             end=snuba_filter.end,
             conditions=snuba_filter.conditions,
             filter_keys=snuba_filter.filter_keys,
-            orderby=["-stddev", "tags_key"],
+            orderby=["-count", "tags_key"],
             groupby="tags_key",
             # TODO(Kevan): Check using having vs where before mainlining
             having=[excluded_tags],
@@ -796,8 +799,10 @@ def get_performance_facets(
             return []
 
     results = []
+    snuba_filter.conditions.append([aggregate_column, "IS NOT NULL", None])
 
-    sampling_enabled = True
+    # Only enable sampling if over 10000 values
+    sampling_enabled = key_names["data"][0]["count"] > 10000
     options_sample_rate = options.get("discover2.tags_performance_facet_sample_rate") or 0.1
 
     sample_rate = options_sample_rate if sampling_enabled else None
@@ -816,7 +821,10 @@ def get_performance_facets(
             conditions = snuba_filter.conditions
             conditions.append(["tags_key", "IN", aggregate_tags])
             tag_values = raw_query(
-                aggregations=[[aggregate_function, aggregate_column, "aggregate"]],
+                aggregations=[
+                    [aggregate_function, aggregate_column, "aggregate"],
+                    ["count", None, "count"],
+                ],
                 conditions=conditions,
                 start=snuba_filter.start,
                 end=snuba_filter.end,
@@ -831,7 +839,9 @@ def get_performance_facets(
             )
             results.extend(
                 [
-                    FacetResult(r["tags_key"], r["tags_value"], int(r["aggregate"]))
+                    PerformanceFacetResult(
+                        r["tags_key"], r["tags_value"], float(r["aggregate"]), int(r["count"])
+                    )
                     for r in tag_values["data"]
                 ]
             )

+ 8 - 4
tests/sentry/snuba/test_discover.py

@@ -2598,6 +2598,7 @@ class GetPerformanceFacetsTest(SnubaTestCase, TestCase):
         if tags is None:
             tags = []
         event = load_data("transaction").copy()
+        event.data["tags"].extend(tags)
         event.update(
             {
                 "transaction": name,
@@ -2635,14 +2636,17 @@ class GetPerformanceFacetsTest(SnubaTestCase, TestCase):
             result = discover.get_performance_facets("", params)
             self.wait_for_event_count(self.project.id, 2)
 
-            assert len(result) == 10
+            assert len(result) == 12
             for r in result:
                 if r.key == "color" and r.value == "red":
-                    assert r.count == 1000
+                    assert r.count == 1
+                    assert r.performance == 1000
                 elif r.key == "color" and r.value == "blue":
-                    assert r.count == 2000
+                    assert r.count == 1
+                    assert r.performance == 2000
                 else:
-                    assert r.count == 1500
+                    assert r.count == 2
+                    assert r.performance == 1500
 
 
 def test_zerofill():