Browse Source

feat(profiling): Pass thread id to flamegraph generation (#74846)

To generate a more accurate flamegraph, we want samples from the active
thread.

Depends on getsentry/snuba#6138
Tony Xiao 7 months ago
parent
commit
a705fd4d35

+ 22 - 5
src/sentry/profiles/flamegraph.py

@@ -274,6 +274,7 @@ class ContinuousProfileCandidate(TypedDict):
     project_id: int
     profiler_id: str
     chunk_id: str
+    thread_id: str
     start: str
     end: str
 
@@ -287,6 +288,7 @@ class ProfileCandidates(TypedDict):
 class ProfilerMeta:
     project_id: int
     profiler_id: str
+    thread_id: str
     start: float
     end: float
 
@@ -375,7 +377,6 @@ class FlamegraphExecutor:
         }
 
     def get_profile_candidates_from_transactions(self) -> ProfileCandidates:
-        # TODO: continuous profiles support
         max_profiles = options.get("profiling.flamegraph.profile-set.size")
 
         builder = DiscoverQueryBuilder(
@@ -388,6 +389,7 @@ class FlamegraphExecutor:
                 "precise.finish_ts",
                 "profile.id",
                 "profiler.id",
+                "thread.id",
             ],
             query=self.query,
             limit=max_profiles,
@@ -399,9 +401,18 @@ class FlamegraphExecutor:
         builder.add_conditions(
             [
                 Or(
-                    [
-                        Condition(Column("profile_id"), Op.IS_NOT_NULL),
-                        Condition(Column("profiler_id"), Op.IS_NOT_NULL),
+                    conditions=[
+                        Condition(builder.resolve_column("profile.id"), Op.IS_NOT_NULL),
+                        And(
+                            conditions=[
+                                Condition(builder.resolve_column("profiler.id"), Op.IS_NOT_NULL),
+                                Condition(
+                                    Function("has", [Column("contexts.key"), "trace.thread_id"]),
+                                    Op.EQ,
+                                    1,
+                                ),
+                            ],
+                        ),
                     ],
                 ),
             ],
@@ -419,11 +430,12 @@ class FlamegraphExecutor:
                 ProfilerMeta(
                     project_id=row["project.id"],
                     profiler_id=row["profiler.id"],
+                    thread_id=row["thread.id"],
                     start=row["precise.start_ts"],
                     end=row["precise.finish_ts"],
                 )
                 for row in results["data"]
-                if row["profiler.id"] is not None
+                if row["profiler.id"] is not None and row["thread.id"]
             ]
         )
 
@@ -513,11 +525,16 @@ class FlamegraphExecutor:
                     "project_id": profiler_meta.project_id,
                     "profiler_id": profiler_meta.profiler_id,
                     "chunk_id": row["chunk_id"],
+                    "thread_id": profiler_meta.thread_id,
                     "start": str(int(max(start, profiler_meta.start) * 1.0e9)),
                     "end": str(int(min(end, profiler_meta.end) * 1.0e9)),
                 }
             )
 
+        # TODO: There is the possibility that different transactions use the same
+        # profiler, chunk and thread ids. So make sure to merge overlapping candidates
+        # to avoid using the same sample multiple times.
+
         return continuous_profile_candidates
 
     def _query_chunks_for_profilers(self, query: Query) -> Mapping[str, Any]:

+ 8 - 0
src/sentry/snuba/events.py

@@ -765,6 +765,14 @@ class Columns(Enum):
         issue_platform_name=None,  # TODO: This doesn't exist yet
         alias="profiler.id",
     )
+    THREAD_ID = Column(
+        group_name=None,
+        event_name=None,
+        transaction_name="contexts[trace.thread_id]",
+        discover_name="contexts[trace.thread_id]",
+        issue_platform_name=None,
+        alias="thread.id",
+    )
 
     REPLAY_ID = Column(
         group_name=None,

+ 32 - 3
tests/sentry/api/endpoints/test_organization_profiling_profiles.py

@@ -5,7 +5,7 @@ from uuid import uuid4
 from django.http import HttpResponse
 from django.urls import reverse
 from rest_framework.exceptions import ErrorDetail
-from snuba_sdk import Column, Condition, Function, Op, Or
+from snuba_sdk import And, Column, Condition, Function, Op, Or
 
 from sentry.profiles.flamegraph import FlamegraphExecutor
 from sentry.profiles.utils import proxy_profiling_service
@@ -138,6 +138,7 @@ class OrganizationProfilingFlamegraphTest(ProfilesSnubaTestCase):
         transaction=None,
         profile_id=None,
         profiler_id=None,
+        thread_id=None,
         project=None,
     ):
         data = load_data("transaction", timestamp=self.ten_mins_ago)
@@ -151,6 +152,11 @@ class OrganizationProfilingFlamegraphTest(ProfilesSnubaTestCase):
         if profiler_id is not None:
             data.setdefault("contexts", {}).setdefault("profile", {})["profiler_id"] = profiler_id
 
+        if thread_id is not None:
+            data.setdefault("contexts", {}).setdefault("trace", {}).setdefault("data", {})[
+                "thread.id"
+            ] = thread_id
+
         self.store_event(data, project_id=project.id if project else self.project.id)
 
         return data
@@ -281,7 +287,18 @@ class OrganizationProfilingFlamegraphTest(ProfilesSnubaTestCase):
                     Or(
                         conditions=[
                             Condition(Column("profile_id"), Op.IS_NOT_NULL),
-                            Condition(Column("profiler_id"), Op.IS_NOT_NULL),
+                            And(
+                                conditions=[
+                                    Condition(Column("profiler_id"), Op.IS_NOT_NULL),
+                                    Condition(
+                                        Function(
+                                            "has", [Column("contexts.key"), "trace.thread_id"]
+                                        ),
+                                        Op.EQ,
+                                        1,
+                                    ),
+                                ],
+                            ),
                         ],
                     )
                     in snql_request.query.where
@@ -379,9 +396,11 @@ class OrganizationProfilingFlamegraphTest(ProfilesSnubaTestCase):
 
         # this transaction has continuous profile with a matching chunk (to be mocked below)
         profiler_id = uuid4().hex
+        thread_id = "12345"
         profiler_transaction = self.store_transaction(
             transaction="foo",
             profiler_id=profiler_id,
+            thread_id=thread_id,
             project=self.project,
         )
         start_timestamp = datetime.fromtimestamp(profiler_transaction["start_timestamp"])
@@ -430,7 +449,16 @@ class OrganizationProfilingFlamegraphTest(ProfilesSnubaTestCase):
             Or(
                 conditions=[
                     Condition(Column("profile_id"), Op.IS_NOT_NULL),
-                    Condition(Column("profiler_id"), Op.IS_NOT_NULL),
+                    And(
+                        conditions=[
+                            Condition(Column("profiler_id"), Op.IS_NOT_NULL),
+                            Condition(
+                                Function("has", [Column("contexts.key"), "trace.thread_id"]),
+                                Op.EQ,
+                                1,
+                            ),
+                        ],
+                    ),
                 ],
             )
             in snql_request.query.where
@@ -452,6 +480,7 @@ class OrganizationProfilingFlamegraphTest(ProfilesSnubaTestCase):
                         "project_id": self.project.id,
                         "profiler_id": profiler_id,
                         "chunk_id": chunk["chunk_id"],
+                        "thread_id": thread_id,
                         "start": str(int(profiler_transaction["start_timestamp"] * 1e9)),
                         "end": str(int(profiler_transaction["timestamp"] * 1e9)),
                     },