1 year ago · 6662d9adf3
--- a/src/sentry/api/endpoints/organization_events_root_cause_analysis.py
+++ b/src/sentry/api/endpoints/organization_events_root_cause_analysis.py
@@ -21,7 +21,7 @@ SNUBA_QUERY_LIMIT = 10000
 
				 def query_spans(transaction, regression_breakpoint, params):
			
 
				     selected_columns = [
			
 
				         "count(span_id) as span_count",
			
 
				-        "sumArray(spans_exclusive_time) as total_span_self_time",
			
 
				+        "percentileArray(spans_exclusive_time, 0.95) as p95_self_time",
			
 
				         "array_join(spans_op) as span_op",
			
 
				         "array_join(spans_group) as span_group",
			
 
				         # want a single event id to fetch from nodestore for the span description
			
@@ -34,7 +34,7 @@ def query_spans(transaction, regression_breakpoint, params):
 
				         selected_columns=selected_columns,
			
 
				         equations=[],
			
 
				         query=f"transaction:{transaction}",
			
 
				-        orderby=["span_op", "span_group", "total_span_self_time"],
			
 
				+        orderby=["span_op", "span_group", "p95_self_time"],
			
 
				         limit=SNUBA_QUERY_LIMIT,
			
 
				         config=QueryBuilderConfig(
			
 
				             auto_aggregations=True,
			
--- a/src/sentry/api/helpers/span_analysis.py
+++ b/src/sentry/api/helpers/span_analysis.py
@@ -1,14 +1,23 @@
 
				-# example data = list of dictionaries
			
 
				+from typing import Any, List, TypedDict
			
 
				 
			
 
				-# data = [{'period': 1, 'k': 'browser', 'txn_count': 2978, 'sum_v': 2088325.678, 'count_v': 24069, 'avg_v': 86.76412306},
			
 
				-# {'period': 1, 'k': 'http.client', 'txn_count': 2978, 'sum_v': 13444332.97, 'count_v': 78114, 'avg_v': 172.1116953},
			
 
				-# {'period': 1, 'k': 'mark', 'txn_count': 2978, 'sum_v': 0, 'count_v': 5390, 'avg_v': 0},
			
 
				-# {'period': 1, 'k': 'pageload', 'txn_count': 2978, 'sum_v': 1853836.58, 'count_v': 2978, 'avg_v': 622.5106045},
			
 
				-# {'period': 1, 'k': 'paint', 'txn_count': 2978, 'sum_v': 0, 'count_v': 5151, 'avg_v': 0},
			
 
				-# {'period': 1, 'k': 'resource.css', 'txn_count': 2978, 'sum_v': 449854.7821, 'count_v': 8926, 'avg_v': 50.39825029}]
			
 
				 
			
 
				+class Row(TypedDict):
			
 
				+    span_op: str
			
 
				+    span_group: str
			
 
				+    transaction_count: int
			
 
				+    p95_self_time: float
			
 
				+    sample_event_id: str
			
 
				+    span_count: int
			
 
				+    period: str
			
 
				 
			
 
				-def span_analysis(data):
			
 
				+
			
 
				+class AugmentedData(Row):
			
 
				+    span_key: str
			
 
				+    relative_freq: float
			
 
				+    score: float
			
 
				+
			
 
				+
			
 
				+def span_analysis(data: List[Row]):
			
 
				 
			
 
				     # create a unique identifier for each span
			
 
				     span_keys = [row["span_op"] + "," + row["span_group"] for row in data]
			
@@ -16,33 +25,32 @@ def span_analysis(data):
 
				     # number of occurrences of a span/transaction
			
 
				     count_col = [row["span_count"] for row in data]
			
 
				     txn_count = [row["transaction_count"] for row in data]
			
 
				+    p95_self_time = [row["p95_self_time"] for row in data]
			
 
				 
			
 
				-    # total self time of a span
			
 
				-    sum_col = [row["total_span_self_time"] for row in data]
			
 
				-
			
 
				-    # add in three new fields
			
 
				+    # add in two new fields
			
 
				     # 1. relative freq - the avg number of times a span occurs per transaction
			
 
				-    # 2. avg duration - average duration of a span (total self time / span count)
			
 
				-    # 3. score - a nondescriptive metric to evaluate the span (relative freq * avg duration)
			
 
				+    # 2. score - a nondescriptive metric to evaluate the span (relative freq * avg duration)
			
 
				     relative_freq = [count_col[x] / txn_count[x] for x in range(len(count_col))]
			
 
				-    avg_col = [sum_col[x] / count_col[x] for x in range(len(sum_col))]
			
 
				-    score_col = [relative_freq[x] * avg_col[x] for x in range(len(relative_freq))]
			
 
				-
			
 
				-    for i in range(len(data)):
			
 
				-        row = data[i]
			
 
				-        row["relative_freq"] = relative_freq[i]
			
 
				-        row["score"] = score_col[i]
			
 
				-        row["avg_duration"] = avg_col[i]
			
 
				-        row["span_key"] = span_keys[i]
			
 
				+    score_col = [relative_freq[x] * p95_self_time[x] for x in range(len(relative_freq))]
			
 
				+
			
 
				+    data_frames: List[AugmentedData] = [
			
 
				+        {
			
 
				+            **data[i],  # type: ignore[misc]
			
 
				+            "relative_freq": relative_freq[i],
			
 
				+            "score": score_col[i],
			
 
				+            "span_key": span_keys[i],
			
 
				+        }
			
 
				+        for i in range(len(data))
			
 
				+    ]
			
 
				 
			
 
				     # create two dataframes for period 0 and 1 and keep only the same spans in both periods
			
 
				-    span_data_p0 = {row["span_key"]: row for row in data if row["period"] == "before"}
			
 
				-    span_data_p1 = {row["span_key"]: row for row in data if row["period"] == "after"}
			
 
				+    span_data_p0 = {row["span_key"]: row for row in data_frames if row["period"] == "before"}
			
 
				+    span_data_p1 = {row["span_key"]: row for row in data_frames if row["period"] == "after"}
			
 
				 
			
 
				     all_keys = set(span_data_p0.keys()).union(span_data_p1.keys())
			
 
				 
			
 
				     # merge the dataframes to do span analysis
			
 
				-    problem_spans = []
			
 
				+    problem_spans: List[Any] = []
			
 
				 
			
 
				     # Perform the join operation
			
 
				     for key in all_keys:
			
@@ -53,27 +61,31 @@ def span_analysis(data):
 
				         if row1 and row2:
			
 
				             score_delta = row2["score"] - row1["score"]
			
 
				             freq_delta = row2["relative_freq"] - row1["relative_freq"]
			
 
				-            duration_delta = row2["avg_duration"] - row1["avg_duration"]
			
 
				+            duration_delta = row2["p95_self_time"] - row1["p95_self_time"]
			
 
				         elif row2:
			
 
				             score_delta = row2["score"]
			
 
				             freq_delta = row2["relative_freq"]
			
 
				-            duration_delta = row2["avg_duration"]
			
 
				+            duration_delta = row2["p95_self_time"]
			
 
				             new_span = True
			
 
				 
			
 
				         # We're only interested in span changes if they positively impacted duration
			
 
				         if score_delta > 0:
			
 
				+            sample_event_id = row1 and row1["sample_event_id"] or row2 and row2["sample_event_id"]
			
 
				+            if not sample_event_id:
			
 
				+                continue
			
 
				+
			
 
				             problem_spans.append(
			
 
				                 {
			
 
				                     "span_op": key.split(",")[0],
			
 
				                     "span_group": key.split(",")[1],
			
 
				-                    "sample_event_id": row1["sample_event_id"] if row1 else row2["sample_event_id"],
			
 
				+                    "sample_event_id": sample_event_id,
			
 
				                     "score_delta": score_delta,
			
 
				                     "freq_before": row1["relative_freq"] if row1 else 0,
			
 
				-                    "freq_after": row2["relative_freq"],
			
 
				+                    "freq_after": row2["relative_freq"] if row2 else 0,
			
 
				                     "freq_delta": freq_delta,
			
 
				                     "duration_delta": duration_delta,
			
 
				-                    "duration_before": row1["avg_duration"] if row1 else 0,
			
 
				-                    "duration_after": row2["avg_duration"],
			
 
				+                    "duration_before": row1["p95_self_time"] if row1 else 0,
			
 
				+                    "duration_after": row2["p95_self_time"] if row2 else 0,
			
 
				                     "is_new_span": new_span,
			
 
				                 }
			
 
				             )
			
--- a/tests/sentry/api/endpoints/test_organization_root_cause_analysis.py
+++ b/tests/sentry/api/endpoints/test_organization_root_cause_analysis.py
@@ -254,13 +254,13 @@ class OrganizationRootCauseAnalysisTest(MetricsAPIBaseTestCase):
 
				                 "span_op": "django.middleware",
			
 
				                 "span_group": "2b9cbb96dbf59baa",
			
 
				                 "span_description": "middleware span",
			
 
				-                "score_delta": 640.0,
			
 
				+                "score_delta": 1578.0,
			
 
				                 "freq_before": 1.0,
			
 
				                 "freq_after": 3.0,
			
 
				                 "freq_delta": 2.0,
			
 
				-                "duration_delta": 173.33333333333334,
			
 
				+                "duration_delta": 486.0,
			
 
				                 "duration_before": 60.0,
			
 
				-                "duration_after": 233.33333333333334,
			
 
				+                "duration_after": 546.0,
			
 
				                 "is_new_span": False,
			
 
				             }
			
 
				         ]