Browse Source

feat(spans): Try different json libraries for span deserialization (#70055)

Try different json libraries for span deserialization
Track metric to see what kind of improvement we get
Add an option to profile the transaction if we'd like to see what's
happening
Shruthi 10 months ago
parent
commit
47008a764f

+ 1 - 0
src/sentry/conf/types/sdk_config.py

@@ -21,6 +21,7 @@ class SdkConfig(TypedDict):
     before_send: NotRequired[Callable[[Event, Hint], Event | None]]
     before_send_transaction: NotRequired[Callable[[Event, Hint], Event | None]]
     profiles_sample_rate: NotRequired[float]
+    profiles_sampler: NotRequired[Callable[[dict[str, Any]], float]]
     profiler_mode: NotRequired[Literal["sleep", "thread", "gevent", "unknown"]]
     enable_db_query_source: NotRequired[bool]
     db_query_source_threshold_ms: NotRequired[int]

+ 16 - 0
src/sentry/options/defaults.py

@@ -2374,6 +2374,22 @@ register(
     default=False,
     flags=FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
 )
+register(
+    "standalone-spans.profile-process-messages.rate",
+    type=Float,
+    default=0.0,
+    flags=FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
+)
+register(
+    "standalone-spans.deserialize-spans-rapidjson.enable",
+    default=False,
+    flags=FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
+)
+register(
+    "standalone-spans.deserialize-spans-orjson.enable",
+    default=False,
+    flags=FLAG_PRIORITIZE_DISK | FLAG_AUTOMATOR_MODIFIABLE,
+)
 register(
     "indexed-spans.agg-span-waterfall.enable",
     default=False,

+ 67 - 51
src/sentry/spans/consumers/process/factory.py

@@ -4,6 +4,8 @@ from collections import defaultdict
 from collections.abc import Mapping
 from typing import Any
 
+import orjson
+import rapidjson
 import sentry_sdk
 from arroyo import Topic as ArroyoTopic
 from arroyo.backends.kafka import KafkaProducer, build_kafka_configuration
@@ -68,7 +70,15 @@ def prepare_buffered_segment_payload(segments) -> bytes:
     return b'{"spans": [' + segment_str + b"]}"
 
 
-def _deserialize_span(value: bytes) -> Mapping[str, Any]:
+@metrics.wraps("spans.consumers.process.deserialize_span")
+def _deserialize_span(value: bytes, use_orjson=False, use_rapidjson=False) -> Mapping[str, Any]:
+    if use_orjson:
+        sentry_sdk.set_tag("json_lib", "orjson")
+        return orjson.loads(value)
+    if use_rapidjson:
+        sentry_sdk.set_tag("json_lib", "rapidjson")
+        return rapidjson.loads(value)
+
     return SPAN_SCHEMA.decode(value)
 
 
@@ -97,8 +107,13 @@ def _process_message(message: Message[KafkaPayload]) -> SpanMessageWithMetadata
         timestamp = int(message.value.timestamp.timestamp())
         partition = message.value.partition.index
 
+        use_orjson = options.get("standalone-spans.deserialize-spans-orjson.enable")
+        use_rapidjson = options.get("standalone-spans.deserialize-spans-rapidjson.enable")
+
         with txn.start_child(op="deserialize"):
-            span = _deserialize_span(payload_value)
+            span = _deserialize_span(
+                payload_value, use_orjson=use_orjson, use_rapidjson=use_rapidjson
+            )
 
         segment_id: str | None = span.get("segment_id", None)
         if segment_id is None:
@@ -121,49 +136,52 @@ def process_message(message: Message[KafkaPayload]) -> SpanMessageWithMetadata |
         return FILTERED_PAYLOAD
 
 
-def _batch_write_to_redis(
-    message: Message[ValuesBatch[SpanMessageWithMetadata]],
-):
+def _batch_write_to_redis(message: Message[ValuesBatch[SpanMessageWithMetadata]]):
     """
     Gets a batch of `SpanMessageWithMetadata` and creates a dictionary with
     segment_id as key and a list of spans belonging to that segment_id as value.
     Pushes the batch of spans to redis.
     """
-    batch = message.payload
-    latest_ts_by_partition: dict[int, int] = {}
-    spans_map: dict[SegmentKey, list[bytes]] = defaultdict(list)
-    segment_first_seen_ts: dict[SegmentKey, int] = {}
-
-    for item in batch:
-        payload = item.payload
-        partition = payload.partition
-        segment_id = payload.segment_id
-        project_id = payload.project_id
-        span = payload.span
-        timestamp = payload.timestamp
-
-        key = SegmentKey(segment_id, project_id, partition)
-
-        # Collects spans for each segment_id
-        spans_map[key].append(span)
-
-        # Collects "first_seen" timestamps for each segment in batch.
-        # Batch step doesn't guarantee order, so pick lowest ts.
-        if key not in segment_first_seen_ts or timestamp < segment_first_seen_ts[key]:
-            segment_first_seen_ts[key] = timestamp
-
-        # Collects latest timestamps processed in each partition. It is
-        # important to keep track of this per partition because message
-        # timestamps are guaranteed to be monotonic per partition only.
-        if partition not in latest_ts_by_partition or timestamp > latest_ts_by_partition[partition]:
-            latest_ts_by_partition[partition] = timestamp
-
-    client = RedisSpansBuffer()
-    return client.batch_write_and_check_processing(
-        spans_map=spans_map,
-        segment_first_seen_ts=segment_first_seen_ts,
-        latest_ts_by_partition=latest_ts_by_partition,
-    )
+    with sentry_sdk.start_transaction(op="process", name="spans.process.expand_segments"):
+        batch = message.payload
+        latest_ts_by_partition: dict[int, int] = {}
+        spans_map: dict[SegmentKey, list[bytes]] = defaultdict(list)
+        segment_first_seen_ts: dict[SegmentKey, int] = {}
+
+        for item in batch:
+            payload = item.payload
+            partition = payload.partition
+            segment_id = payload.segment_id
+            project_id = payload.project_id
+            span = payload.span
+            timestamp = payload.timestamp
+
+            key = SegmentKey(segment_id, project_id, partition)
+
+            # Collects spans for each segment_id
+            spans_map[key].append(span)
+
+            # Collects "first_seen" timestamps for each segment in batch.
+            # Batch step doesn't guarantee order, so pick lowest ts.
+            if key not in segment_first_seen_ts or timestamp < segment_first_seen_ts[key]:
+                segment_first_seen_ts[key] = timestamp
+
+            # Collects latest timestamps processed in each partition. It is
+            # important to keep track of this per partition because message
+            # timestamps are guaranteed to be monotonic per partition only.
+            if (
+                partition not in latest_ts_by_partition
+                or timestamp > latest_ts_by_partition[partition]
+            ):
+                latest_ts_by_partition[partition] = timestamp
+
+        client = RedisSpansBuffer()
+
+        return client.batch_write_and_check_processing(
+            spans_map=spans_map,
+            segment_first_seen_ts=segment_first_seen_ts,
+            latest_ts_by_partition=latest_ts_by_partition,
+        )
 
 
 def batch_write_to_redis(
@@ -177,19 +195,17 @@ def batch_write_to_redis(
 
 
 def _expand_segments(should_process_segments: list[ProcessSegmentsContext]):
-    buffered_segments: list[KafkaPayload | FilteredPayload] = []
+    with sentry_sdk.start_transaction(op="process", name="spans.process.expand_segments") as txn:
+        buffered_segments: list[KafkaPayload | FilteredPayload] = []
 
-    for result in should_process_segments:
-        timestamp = result.timestamp
-        partition = result.partition
-        should_process = result.should_process_segments
+        for result in should_process_segments:
+            timestamp = result.timestamp
+            partition = result.partition
+            should_process = result.should_process_segments
 
-        if not should_process:
-            continue
+            if not should_process:
+                continue
 
-        with sentry_sdk.start_transaction(
-            op="process", name="spans.process.expand_segments"
-        ) as txn:
             client = RedisSpansBuffer()
             payload_context = {}
 
@@ -202,7 +218,7 @@ def _expand_segments(should_process_segments: list[ProcessSegmentsContext]):
 
             # With pipelining, redis server is forced to queue replies using
             # up memory, so batching the keys we fetch.
-            with txn.start_child(op="process", description="produce_fetched_segments"):
+            with txn.start_child(op="process", description="read_and_expire_many_segments"):
                 for i in range(0, len(keys), BATCH_SIZE):
                     segments = client.read_and_expire_many_segments(keys[i : i + BATCH_SIZE])
 

+ 17 - 1
src/sentry/utils/sdk.py

@@ -201,6 +201,22 @@ def traces_sampler(sampling_context):
     return float(settings.SENTRY_BACKEND_APM_SAMPLING or 0)
 
 
+def profiles_sampler(sampling_context):
+    PROFILES_SAMPLING_RATE = {
+        "spans.process.process_message": options.get(
+            "standalone-spans.profile-process-messages.rate"
+        )
+    }
+    if "transaction_context" in sampling_context:
+        transaction_name = sampling_context["transaction_context"].get("name")
+
+        if transaction_name in PROFILES_SAMPLING_RATE:
+            return PROFILES_SAMPLING_RATE[transaction_name]
+
+    # Default to the sampling rate in settings
+    return float(settings.SENTRY_PROFILES_SAMPLE_RATE or 0)
+
+
 def before_send_transaction(event: Event, _: Hint) -> Event | None:
     # Discard generic redirects.
     # This condition can be removed once https://github.com/getsentry/team-sdks/issues/48 is fixed.
@@ -300,7 +316,7 @@ def configure_sdk():
         experimental_transport = None
 
     if settings.SENTRY_PROFILING_ENABLED:
-        sdk_options["profiles_sample_rate"] = settings.SENTRY_PROFILES_SAMPLE_RATE
+        sdk_options["profiles_sampler"] = profiles_sampler
         sdk_options["profiler_mode"] = settings.SENTRY_PROFILER_MODE
 
     class MultiplexingTransport(sentry_sdk.transport.Transport):