Browse Source

feat(starfish): Flamegraph for a span op in a transaction (#53633)

This is some temporary code to build out some PoCs for
mobile starfish. For example, we'd like to retrieve a flamegraph
for the duration of a `span.op:ui.load.initial_display` for a
transaction
to show the user the work being done during initial display.

Since there isn't test coverage for this code, a request like this

`http://sentry.localhost:8000/api/0/organizations/sentry/profiling/flamegraph/?project=1&statsPeriod=24h&spans.op=pageload&query=transaction:/performance/`

would make the following CH queries and then onward to the profiling
service
```
 SELECT (replaceAll(toString(event_id), '-', '') AS _snuba_event_id), (replaceAll(toString(profile_id), '-', '') AS _snuba_profile_id)
  FROM transactions_local PREWHERE equals((transaction_name AS _snuba_transaction), '/performance/')
 WHERE greaterOrEquals((finish_ts AS _snuba_finish_ts), toDateTime('2023-07-25T17:48:01', 'Universal'))
   AND less(_snuba_finish_ts, toDateTime('2023-07-26T17:48:01', 'Universal'))
   AND in((project_id AS _snuba_project_id), [1])
   AND equals(('transaction' AS _snuba_type), 'transaction')
   AND isNotNull(_snuba_profile_id)
   AND has((spans.op AS `_snuba_spans.op`), 'pageload')
 LIMIT 100
OFFSET 0
```

```
 SELECT (replaceAll(toString(transaction_id), '-', '') AS _snuba_transaction_id), (start_timestamp AS _snuba_start_timestamp), (start_ms AS _snuba_start_ms), (end_timestamp AS _snuba_end_timestamp), (end_ms AS _snuba_end_ms)
  FROM spans_local PREWHERE in(_snuba_transaction_id, ['d178cc3ca11b4057832378701ce85d5c', '39d4fbb7f5a14982aa1b85527cd1250f'])
 WHERE equals((project_id AS _snuba_project_id), 1)
   AND greaterOrEquals((end_timestamp AS _snuba_timestamp), toDateTime('2023-07-25T17:48:01', 'Universal'))
   AND less(_snuba_timestamp, toDateTime('2023-07-26T17:48:01', 'Universal'))
   AND equals((op AS _snuba_op), 'pageload')
 LIMIT 1000
OFFSET 0
```
Shruthi 1 year ago
parent
commit
6ce961496b

+ 18 - 1
src/sentry/api/endpoints/organization_profiling_profiles.py

@@ -12,7 +12,11 @@ from sentry.api.base import region_silo_endpoint
 from sentry.api.bases import NoProjects, OrganizationEventsV2EndpointBase
 from sentry.exceptions import InvalidSearchQuery
 from sentry.models import Organization
-from sentry.profiles.flamegraph import get_profile_ids, get_profile_ids_with_spans
+from sentry.profiles.flamegraph import (
+    get_profile_ids,
+    get_profile_ids_for_span_op,
+    get_profile_ids_with_spans,
+)
 from sentry.profiles.utils import parse_profile_filters, proxy_profiling_service
 
 
@@ -50,12 +54,15 @@ class OrganizationProfilingFlamegraphEndpoint(OrganizationProfilingBaseEndpoint)
         if not features.has("organizations:profiling", organization, actor=request.user):
             return Response(status=404)
 
+        has_starfish = features.has("organizations:starfish-view", organization, actor=request.user)
+
         params = self.get_snuba_params(request, organization, check_global_views=False)
         project_ids = params["project_id"]
         if len(project_ids) > 1:
             raise ParseError(detail="You cannot get a flamegraph from multiple projects.")
 
         span_group = request.query_params.get("spans.group", None)
+        span_op = request.query_params.get("spans.op", None)
         if span_group is not None:
             backend = request.query_params.get("backend", "indexed_spans")
             profile_ids = get_profile_ids_with_spans(
@@ -66,6 +73,16 @@ class OrganizationProfilingFlamegraphEndpoint(OrganizationProfilingBaseEndpoint)
                 backend,
                 request.query_params.get("query", None),
             )
+        elif span_op is not None and has_starfish:
+            backend = "indexed_spans"
+            profile_ids = get_profile_ids_for_span_op(
+                organization.id,
+                project_ids[0],
+                params,
+                span_op,
+                backend,
+                request.query_params.get("query", None),
+            )
         else:
             profile_ids = get_profile_ids(params, request.query_params.get("query", None))
 

+ 54 - 3
src/sentry/profiles/flamegraph.py

@@ -58,7 +58,7 @@ def get_profile_ids(
 
 def get_span_intervals(
     project_id: str,
-    span_group: str,
+    span_filter: Condition,
     transaction_ids: List[str],
     organization_id: str,
     params: ParamsType,
@@ -75,9 +75,9 @@ def get_span_intervals(
         where=[
             Condition(Column("project_id"), Op.EQ, project_id),
             Condition(Column("transaction_id"), Op.IN, transaction_ids),
-            Condition(Column("group_raw"), Op.EQ, span_group),
             Condition(Column("timestamp"), Op.GTE, params["start"]),
             Condition(Column("timestamp"), Op.LT, params["end"]),
+            span_filter,
         ],
     )
 
@@ -169,7 +169,7 @@ def get_profile_ids_with_spans(
     elif backend == "indexed_spans":
         data = get_span_intervals(
             project_id,
-            span_group,
+            Condition(Column("group_raw"), Op.EQ, span_group),
             list(transaction_to_prof.keys()),
             organization_id,
             params,
@@ -188,3 +188,54 @@ def get_profile_ids_with_spans(
     spans = [tup[1] for tup in transaction_to_prof.values()]
 
     return {"profile_ids": profile_ids, "spans": spans}
+
+
+def get_profile_ids_for_span_op(
+    organization_id: str,
+    project_id: str,
+    params: ParamsType,
+    span_op: str,
+    backend: str,
+    query: Optional[str] = None,
+):
+    data = query_profiles_data(
+        params,
+        Referrer.API_STARFISH_PROFILE_FLAMEGRAPH.value,
+        selected_columns=["id", "profile.id"],
+        query=query,
+        additional_conditions=[
+            # Check if span op is in the the indexed transactions spans.op array
+            Condition(Function("has", [Column("spans.op"), span_op]), Op.EQ, 1)
+        ],
+    )
+
+    # map {transaction_id: (profile_id, [span intervals])}
+
+    transaction_to_prof: Dict[str, Tuple[str, List[Dict[str, str]]]] = {
+        row["id"]: (row["profile.id"], []) for row in data
+    }
+
+    if not transaction_to_prof:
+        return {"profile_ids": [], "spans": []}
+
+    # Note: "op" is not a part of the indexed spans orderby so this is
+    # is probably not a very efficient filter. This is just to
+    # build a little PoC for now, if it needs to be used more extensively
+    # in production, we can optimize it.
+    data = get_span_intervals(
+        project_id,
+        Condition(Column("op"), Op.EQ, span_op),
+        list(transaction_to_prof.keys()),
+        organization_id,
+        params,
+    )
+
+    for row in data:
+        transaction_to_prof[row["transaction_id"]][1].append(
+            {"start": row["start_ns"], "end": row["end_ns"]}
+        )
+
+    profile_ids = [tup[0] for tup in transaction_to_prof.values()]
+    spans = [tup[1] for tup in transaction_to_prof.values()]
+
+    return {"profile_ids": profile_ids, "spans": spans}