Browse Source

feat(tracing): Add trace_id to span aggregation response (#68471)

- This updates the span-aggregation response to include the trace_id
- Add a new span_samples attribute instead of the list of tuples so its
more obvious which value is which, keeping the existing samples
attribute so we can migrate over without breaking anything
William Mak 11 months ago
parent
commit
f1488551ff

+ 37 - 1
src/sentry/api/endpoints/organization_spans_aggregation.py

@@ -45,6 +45,13 @@ class EventSpans(TypedDict):
     spans: list[EventSpan]
 
 
+class SpanSample(TypedDict):
+    transaction: str | None
+    trace: str | None
+    timestamp: int
+    span: str
+
+
 AggregateSpanRow = TypedDict(
     "AggregateSpanRow",
     {
@@ -61,7 +68,9 @@ AggregateSpanRow = TypedDict(
         "avg(absolute_offset)": float,
         "avg(relative_offset)": float,
         "count()": int,
+        # samples is deprecated in favour of sample_spans
         "samples": set[tuple[Optional[str], str]],
+        "sample_spans": list[SpanSample],
     },
 )
 
@@ -72,6 +81,7 @@ class BaseAggregateSpans:
     def __init__(self) -> None:
         self.aggregated_tree: dict[str, AggregateSpanRow] = {}
         self.current_transaction: str | None = None
+        self.current_trace: str | None = None
 
     def fingerprint_nodes(
         self,
@@ -142,10 +152,31 @@ class BaseAggregateSpans:
                 else node["avg(relative_offset)"]
             )
             node["count()"] += 1
+            # TODO: will need a better way to check we don't add dupes once samples is gone
             if len(node["samples"]) < 5:
                 node["samples"].add((self.current_transaction, span_tree["span_id"]))
+                node["sample_spans"].append(
+                    SpanSample(
+                        {
+                            "transaction": self.current_transaction,
+                            "timestamp": start_timestamp / 1000,
+                            "span": span_tree["span_id"],
+                            "trace": self.current_trace,
+                        }
+                    )
+                )
         else:
             sample = {(self.current_transaction, span_tree["span_id"])}
+            span_sample = [
+                SpanSample(
+                    {
+                        "transaction": self.current_transaction,
+                        "timestamp": start_timestamp / 1000,
+                        "span": span_tree["span_id"],
+                        "trace": self.current_trace,
+                    }
+                )
+            ]
             self.aggregated_tree[node_fingerprint] = {
                 "node_fingerprint": node_fingerprint,
                 "parent_node_fingerprint": parent_node_fingerprint,
@@ -165,6 +196,7 @@ class BaseAggregateSpans:
                 ),
                 "count()": 1,
                 "samples": sample,
+                "sample_spans": span_sample,
             }
 
         # Handles sibling spans that have the same group
@@ -191,6 +223,8 @@ class AggregateIndexedSpans(BaseAggregateSpans):
             spans = event["spans"]
 
             self.current_transaction = event["transaction_id"]
+            self.current_trace = event["trace_id"]
+
             for span_ in spans:
                 span = EventSpan(*span_)
                 span_id = getattr(span, "span_id")
@@ -233,6 +267,8 @@ class AggregateNodestoreSpans(BaseAggregateSpans):
             span_tree = {}
 
             self.current_transaction = event["event_id"]
+            self.current_trace = event["contexts"]["trace"]["trace_id"]
+
             root_span_id = event["contexts"]["trace"]["span_id"]
             span_tree[root_span_id] = {
                 "span_id": root_span_id,
@@ -334,7 +370,7 @@ class OrganizationSpansAggregationEndpoint(OrganizationEventsEndpointBase):
                 builder = SpansIndexedQueryBuilder(
                     dataset=Dataset.SpansIndexed,
                     params=params,
-                    selected_columns=["transaction_id", "count()"],
+                    selected_columns=["transaction_id", "trace_id", "count()", "any(timestamp)"],
                     query=query,
                     limit=100,
                 )

+ 38 - 2
tests/snuba/api/endpoints/test_organization_spans_aggregation.py

@@ -14,6 +14,7 @@ from sentry.utils.samples import load_data
 MOCK_SNUBA_RESPONSE = {
     "data": [
         {
+            "trace_id": "a" * 32,
             "transaction_id": "80fe542aea4945ffbe612646987ee449",
             "count": 71,
             "spans": [
@@ -80,6 +81,7 @@ MOCK_SNUBA_RESPONSE = {
             ],
         },
         {
+            "trace_id": "b" * 32,
             "transaction_id": "86b21833d1854d9b811000b91e7fccfa",
             "count": 71,
             "spans": [
@@ -440,11 +442,45 @@ class OrganizationSpansAggregationTest(APITestCase, SnubaTestCase):
                     ("80fe542aea4945ffbe612646987ee449", "root_1"),
                     ("86b21833d1854d9b811000b91e7fccfa", "root_2"),
                 }
+                assert data[root_fingerprint]["sample_spans"] == [
+                    {
+                        "transaction": "80fe542aea4945ffbe612646987ee449",
+                        "timestamp": 1694625139.1,
+                        "span": "root_1",
+                        "trace": "a" * 32,
+                    },
+                    {
+                        "transaction": "86b21833d1854d9b811000b91e7fccfa",
+                        "timestamp": 1694625159.1,
+                        "span": "root_2",
+                        "trace": "b" * 32,
+                    },
+                ]
             else:
                 assert data[root_fingerprint]["samples"] == {
-                    (self.root_event_1.event_id, self.span_ids_event_1["A"]),
-                    (self.root_event_2.event_id, self.span_ids_event_2["A"]),
+                    (
+                        self.root_event_1.event_id,
+                        self.span_ids_event_1["A"],
+                    ),
+                    (
+                        self.root_event_2.event_id,
+                        self.span_ids_event_2["A"],
+                    ),
                 }
+                assert data[root_fingerprint]["sample_spans"] == [
+                    {
+                        "transaction": self.root_event_1.event_id,
+                        "timestamp": self.root_event_1.data["start_timestamp"],
+                        "span": self.span_ids_event_1["A"],
+                        "trace": self.root_event_1.data["contexts"]["trace"]["trace_id"],
+                    },
+                    {
+                        "transaction": self.root_event_2.event_id,
+                        "timestamp": self.root_event_2.data["start_timestamp"],
+                        "span": self.span_ids_event_2["A"],
+                        "trace": self.root_event_2.data["contexts"]["trace"]["trace_id"],
+                    },
+                ]
 
             fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16]
             assert data[fingerprint]["description"] == "connect"