Browse Source

feat(replays): Separate I/O and compute portions of the runtime into different Arroyo tasks (#86360)

I/O and compute was previously separated into different stages of the
same task. With this PR we add a two-step recording consumer which
separates compute and I/O between two different Arroyo tasks. This
allows us to continue processing messages while we upload files to GCS.

This does not overwrite our current production consumer. We deprecate
the old recording_buffered consumer and replace it with this new
"two-step" consumer. We'll test it in S4S before transitioning our
production consumer.
Colton Allen 1 week ago
parent
commit
ee754813af

+ 79 - 0
bin/mock-replay-recording

@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+"""Insert mock replay recording messages into the INGEST_REPLAYS_RECORDINGS topic.
+
+Helpful commands:
+
+    - Run the consumer.
+        - `sentry run consumer ingest-replay-recordings --consumer-group 0`
+        - `sentry run consumer ingest-replay-recordings-two-step --consumer-group 0`
+    - Check if offsets are committed correctly.
+        - `docker exec -it kafka-kafka-1 kafka-consumer-groups --bootstrap-server localhost:9092 --describe --group 0`
+"""
+
+import logging
+import os
+import time
+import uuid
+
+import click
+import django
+from arroyo import Topic as ArroyoTopic
+from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
+from sentry_kafka_schemas.codecs import Codec
+from sentry_kafka_schemas.schema_types.ingest_replay_recordings_v1 import ReplayRecording
+
+from sentry.conf.types.kafka_definition import Topic, get_topic_codec
+from sentry.runner import configure
+from sentry.utils.kafka_config import get_kafka_producer_cluster_options, get_topic_definition
+
+configure()
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "sentry.conf.server")
+
+
+django.setup()
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_producer() -> KafkaProducer:
+    cluster_name = get_topic_definition(Topic.INGEST_REPLAYS_RECORDINGS)["cluster"]
+    producer_config = get_kafka_producer_cluster_options(cluster_name)
+    return KafkaProducer(build_kafka_configuration(default_config=producer_config))
+
+
+RECORDING_CODEC: Codec[ReplayRecording] = get_topic_codec(Topic.INGEST_REPLAYS_RECORDINGS)
+
+
+@click.command()
+@click.option("--organization-id", type=int, required=True, help="Organization ID")
+@click.option("--project-id", type=int, required=True, help="Project ID")
+def main(organization_id: int, project_id: int) -> None:
+    """Produce a mock uptime result message to the INGEST_REPLAYS_RECORDINGS topic."""
+    message: ReplayRecording = {
+        "key_id": None,
+        "org_id": organization_id,
+        "payload": b'{"segment_id":0}\n[]',
+        "project_id": project_id,
+        "received": int(time.time()),
+        "replay_event": None,
+        "replay_id": uuid.uuid4().hex,
+        "replay_video": None,
+        "retention_days": 30,
+        "type": "replay_recording_not_chunked",
+        "version": 1,
+    }
+
+    producer = get_producer()
+    topic = get_topic_definition(Topic.INGEST_REPLAYS_RECORDINGS)["real_topic_name"]
+    payload = KafkaPayload(None, RECORDING_CODEC.encode(message), [])
+
+    producer.produce(ArroyoTopic(topic), payload)
+    producer.close()
+
+    logger.info("Successfully produced message to %s", topic)
+
+
+if __name__ == "__main__":
+    main()

+ 8 - 22
src/sentry/consumers/__init__.py

@@ -82,26 +82,12 @@ def ingest_replay_recordings_options() -> list[click.Option]:
     return options
 
 
-def ingest_replay_recordings_buffered_options() -> list[click.Option]:
-    """Return a list of ingest-replay-recordings-buffered options."""
-    options = [
-        click.Option(
-            ["--max-buffer-message-count", "max_buffer_message_count"],
-            type=int,
-            default=100,
-        ),
-        click.Option(
-            ["--max-buffer-size-in-bytes", "max_buffer_size_in_bytes"],
-            type=int,
-            default=2_500_000,
-        ),
-        click.Option(
-            ["--max-buffer-time-in-seconds", "max_buffer_time_in_seconds"],
-            type=int,
-            default=1,
-        ),
+def ingest_replay_recordings_two_step_options() -> list[click.Option]:
+    """Return a list of ingest-replay-recordings-two-step options."""
+    return [
+        click.Option(["--max-pending-futures", "max_pending_futures"], type=int, default=256),
+        click.Option(["--num-threads", "num_threads"], type=int, default=16),
     ]
-    return options
 
 
 def ingest_monitors_options() -> list[click.Option]:
@@ -272,10 +258,10 @@ KAFKA_CONSUMERS: Mapping[str, ConsumerDefinition] = {
         "strategy_factory": "sentry.replays.consumers.recording.ProcessReplayRecordingStrategyFactory",
         "click_options": ingest_replay_recordings_options(),
     },
-    "ingest-replay-recordings-buffered": {
+    "ingest-replay-recordings-two-step": {
         "topic": Topic.INGEST_REPLAYS_RECORDINGS,
-        "strategy_factory": "sentry.replays.consumers.recording_buffered.RecordingBufferedStrategyFactory",
-        "click_options": ingest_replay_recordings_buffered_options(),
+        "strategy_factory": "sentry.replays.consumers.recording_two_step.RecordingTwoStepStrategyFactory",
+        "click_options": ingest_replay_recordings_two_step_options(),
     },
     "ingest-monitors": {
         "topic": Topic.INGEST_MONITORS,

+ 0 - 396
src/sentry/replays/consumers/recording_buffered.py

@@ -1,396 +0,0 @@
-"""Replay recording consumer implementation.
-
-# Configuration
-
-The consumer implementation offers three configuration parameters which control the size of the
-buffer.  Those options are:
-
-**max_buffer_message_count:**
-
-This option limits the number of processed messages held in memory at a given time.
-
-**max_buffer_size_in_bytes:**
-
-This option limits the amount of recording-bytes held in memory at a given time.
-
-**max_buffer_time_in_seconds:**
-
-This option limits the amount of time a message will wait in the buffer before being committed. If
-this value exceeds the Kafka commit interval then the Kafka offsets will not be committed until the
-buffer has been flushed and fully committed.
-
-# Errors
-
-All deterministic errors must be handled otherwise the consumer will deadlock and progress will
-be impossible. Unhandled errors will crash the process and force a restart from the last committed
-offset.
-
-An unhandled, intermittent error rate which exceeds one-per-second will deadlock any consumer
-implementation which commits at one second intervals. To fix this case either the message
-processing rate of the consumer must be slowed, the error must be handled, or the commit interval
-must be decreased.
-
-The cost of unwinding an error is determined by the throughput of the consumer implementation. If
-consumer throughput is high an error will force the reprocessing of a larger number of messages
-than if throughput is low. The number of messages being operated on in parallel is material only
-insofar as it impacts the throughput of the consumer.
-"""
-
-from __future__ import annotations
-
-import logging
-import time
-import zlib
-from collections.abc import Mapping
-from concurrent.futures import ThreadPoolExecutor
-from typing import Any, TypedDict, cast
-
-import sentry_sdk
-from arroyo.backends.kafka.consumer import KafkaPayload
-from arroyo.processing.strategies.abstract import ProcessingStrategy, ProcessingStrategyFactory
-from arroyo.processing.strategies.buffer import Buffer
-from arroyo.processing.strategies.commit import CommitOffsets
-from arroyo.processing.strategies.run_task import RunTask
-from arroyo.types import BaseValue, Commit, Message, Partition
-from sentry_kafka_schemas.codecs import Codec, ValidationError
-from sentry_kafka_schemas.schema_types.ingest_replay_recordings_v1 import ReplayRecording
-
-from sentry.conf.types.kafka_definition import Topic, get_topic_codec
-from sentry.logging.handlers import SamplingFilter
-from sentry.models.project import Project
-from sentry.replays.lib.storage import (
-    RecordingSegmentStorageMeta,
-    make_recording_filename,
-    storage_kv,
-)
-from sentry.replays.usecases.ingest import (
-    LOG_SAMPLE_RATE,
-    DropSilently,
-    _report_size_metrics,
-    decompress_segment,
-    parse_headers,
-    track_initial_segment_event,
-)
-from sentry.replays.usecases.ingest.dom_index import (
-    ReplayActionsEvent,
-    emit_replay_actions,
-    parse_replay_actions,
-)
-from sentry.replays.usecases.pack import pack
-from sentry.utils import json, metrics
-
-logger = logging.getLogger(__name__)
-logger.addFilter(SamplingFilter(LOG_SAMPLE_RATE))
-
-RECORDINGS_CODEC: Codec[ReplayRecording] = get_topic_codec(Topic.INGEST_REPLAYS_RECORDINGS)
-
-
-def cast_payload_bytes(x: Any) -> bytes:
-    """
-    Coerces a type from Any to bytes
-
-    sentry-kafka-schemas does not support the typing of bytes for replay's
-    payloads, and so sometimes we have to cast values around to work around the
-    schema.
-
-    Use this helper function to explicitly annotate that. At a later point when
-    sentry-kafka-schemas is fixed, we can replace all usages of this function
-    with the proper solution.
-    """
-    return x
-
-
-def cast_payload_from_bytes(x: bytes) -> Any:
-    """
-    Coerces a type from bytes to Any.
-
-    See cast_payload_bytes
-    """
-    return x
-
-
-class BufferCommitFailed(Exception):
-    pass
-
-
-class RecordingBufferedStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
-    """
-    This consumer processes replay recordings, which are compressed payloads split up into
-    chunks.
-    """
-
-    def __init__(
-        self,
-        max_buffer_message_count: int,
-        max_buffer_size_in_bytes: int,
-        max_buffer_time_in_seconds: int,
-    ) -> None:
-        self.max_buffer_message_count = max_buffer_message_count
-        self.max_buffer_size_in_bytes = max_buffer_size_in_bytes
-        self.max_buffer_time_in_seconds = max_buffer_time_in_seconds
-
-    def create_with_partitions(
-        self,
-        commit: Commit,
-        partitions: Mapping[Partition, int],
-    ) -> ProcessingStrategy[KafkaPayload]:
-        return Buffer(
-            buffer=RecordingBuffer(
-                self.max_buffer_message_count,
-                self.max_buffer_size_in_bytes,
-                self.max_buffer_time_in_seconds,
-            ),
-            next_step=RunTask(
-                function=process_commit,
-                next_step=CommitOffsets(commit),
-            ),
-        )
-
-
-class UploadEvent(TypedDict):
-    key: str
-    value: bytes
-
-
-class InitialSegmentEvent(TypedDict):
-    key_id: int | None
-    org_id: int
-    project_id: int
-    received: int
-    replay_id: str
-    is_replay_video: bool
-
-
-class RecordingBuffer:
-    def __init__(
-        self,
-        max_buffer_message_count: int,
-        max_buffer_size_in_bytes: int,
-        max_buffer_time_in_seconds: int,
-    ) -> None:
-        self.upload_events: list[UploadEvent] = []
-        self.initial_segment_events: list[InitialSegmentEvent] = []
-        self.replay_action_events: list[ReplayActionsEvent] = []
-
-        self.max_buffer_message_count = max_buffer_message_count
-        self.max_buffer_size_in_bytes = max_buffer_size_in_bytes
-        self.max_buffer_time_in_seconds = max_buffer_time_in_seconds
-
-        self._buffer_size_in_bytes: int = 0
-        self._buffer_next_commit_time: int = int(time.time()) + self.max_buffer_time_in_seconds
-
-    @property
-    def buffer(
-        self,
-    ) -> tuple[list[UploadEvent], list[InitialSegmentEvent], list[ReplayActionsEvent]]:
-        return (self.upload_events, self.initial_segment_events, self.replay_action_events)
-
-    @property
-    def is_empty(self) -> bool:
-        return len(self.upload_events) == 0
-
-    @property
-    def is_ready(self) -> bool:
-        """Return "True" if we're ready to commit the buffer."""
-        return (
-            self.has_exceeded_max_message_count
-            or self.has_exceeded_buffer_byte_size
-            or self.has_exceeded_last_buffer_commit_time
-        )
-
-    @property
-    def has_exceeded_max_message_count(self) -> bool:
-        """Return "True" if we have accumulated the configured number of messages."""
-        return len(self.upload_events) >= self.max_buffer_message_count
-
-    @property
-    def has_exceeded_buffer_byte_size(self) -> bool:
-        """Return "True" if we have accumulated the configured number of bytes."""
-        return self._buffer_size_in_bytes >= self.max_buffer_size_in_bytes
-
-    @property
-    def has_exceeded_last_buffer_commit_time(self) -> bool:
-        """Return "True" if we have waited to commit for the configured amount of time."""
-        return time.time() >= self._buffer_next_commit_time
-
-    def append(self, message: BaseValue[KafkaPayload]) -> None:
-        try:
-            process_message(self, message.payload.value)
-        except DropSilently:
-            pass
-
-    def new(self) -> RecordingBuffer:
-        return RecordingBuffer(
-            max_buffer_message_count=self.max_buffer_message_count,
-            max_buffer_size_in_bytes=self.max_buffer_size_in_bytes,
-            max_buffer_time_in_seconds=self.max_buffer_time_in_seconds,
-        )
-
-
-# Message processor.
-
-
-def process_message(buffer: RecordingBuffer, message: bytes) -> None:
-    with sentry_sdk.start_span(op="replays.consumer.recording.decode_kafka_message"):
-        try:
-            decoded_message: ReplayRecording = RECORDINGS_CODEC.decode(message)
-        except ValidationError:
-            # TODO: DLQ
-            logger.exception("Could not decode recording message.")
-            return None
-
-    headers, segment_data = parse_headers(
-        cast_payload_bytes(decoded_message["payload"]), decoded_message["replay_id"]
-    )
-
-    segment = decompress_segment(segment_data)
-    compressed_segment, recording_data = segment.compressed, segment.decompressed
-    _report_size_metrics(len(segment.compressed), len(segment.decompressed))
-
-    recording_segment = RecordingSegmentStorageMeta(
-        project_id=decoded_message["project_id"],
-        replay_id=decoded_message["replay_id"],
-        retention_days=decoded_message["retention_days"],
-        segment_id=headers["segment_id"],
-    )
-
-    if replay_video := decoded_message.get("replay_video"):
-        # Logging org info for bigquery
-        logger.info(
-            "sentry.replays.slow_click",
-            extra={
-                "event_type": "mobile_event",
-                "org_id": decoded_message["org_id"],
-                "project_id": decoded_message["project_id"],
-                "size": len(replay_video),  # type: ignore[arg-type]
-            },
-        )
-
-        # Record video size for COGS analysis.
-        metrics.incr("replays.recording_consumer.replay_video_count")
-        metrics.distribution(
-            "replays.recording_consumer.replay_video_size",
-            len(replay_video),  # type: ignore[arg-type]
-            unit="byte",
-        )
-
-        dat = zlib.compress(pack(rrweb=recording_data, video=cast(bytes, replay_video)))
-        buffer.upload_events.append(
-            {"key": make_recording_filename(recording_segment), "value": dat}
-        )
-
-        # Track combined payload size.
-        metrics.distribution(
-            "replays.recording_consumer.replay_video_event_size", len(dat), unit="byte"
-        )
-    else:
-        buffer.upload_events.append(
-            {"key": make_recording_filename(recording_segment), "value": compressed_segment}
-        )
-
-    # Initial segment events are recorded in the state machine.
-    if headers["segment_id"] == 0:
-        buffer.initial_segment_events.append(
-            {
-                "key_id": decoded_message["key_id"],
-                "org_id": decoded_message["org_id"],
-                "project_id": decoded_message["project_id"],
-                "received": decoded_message["received"],
-                "replay_id": decoded_message["replay_id"],
-                "is_replay_video": decoded_message.get("replay_video") is not None,
-            }
-        )
-
-    try:
-        with sentry_sdk.start_span(op="replays.consumer.recording.json_loads_segment"):
-            parsed_recording_data = json.loads(recording_data)
-            parsed_replay_event = (
-                json.loads(cast_payload_bytes(decoded_message["replay_event"]))
-                if decoded_message.get("replay_event")
-                else None
-            )
-
-        project = Project.objects.get_from_cache(id=decoded_message["project_id"])
-        replay_actions = parse_replay_actions(
-            project,
-            decoded_message["replay_id"],
-            decoded_message["retention_days"],
-            parsed_recording_data,
-            parsed_replay_event,
-            org_id=decoded_message["org_id"],
-        )
-
-        if replay_actions is not None:
-            buffer.replay_action_events.append(replay_actions)
-    except Exception:
-        logging.exception(
-            "Failed to parse recording org=%s, project=%s, replay=%s, segment=%s",
-            decoded_message["org_id"],
-            decoded_message["project_id"],
-            decoded_message["replay_id"],
-            headers["segment_id"],
-        )
-
-
-# Commit.
-
-
-def process_commit(
-    message: Message[tuple[list[UploadEvent], list[InitialSegmentEvent], list[ReplayActionsEvent]]]
-) -> None:
-    # High I/O section.
-    with sentry_sdk.start_span(op="replays.consumer.recording.commit_buffer"):
-        upload_events, initial_segment_events, replay_action_events = message.payload
-        commit_uploads(upload_events)
-        commit_initial_segments(initial_segment_events)
-        commit_replay_actions(replay_action_events)
-
-
-def commit_uploads(upload_events: list[UploadEvent]) -> None:
-    with sentry_sdk.start_span(op="replays.consumer.recording.upload_segments"):
-        # This will run to completion taking potentially an infinite amount of time. However,
-        # that outcome is unlikely. In the event of an indefinite backlog the process can be
-        # restarted.
-        with ThreadPoolExecutor(max_workers=len(upload_events)) as pool:
-            futures = [pool.submit(_do_upload, upload) for upload in upload_events]
-
-    has_errors = False
-
-    # These futures should never fail unless there is a service-provider issue.
-    for error in filter(lambda n: n is not None, (fut.exception() for fut in futures)):
-        has_errors = True
-        sentry_sdk.capture_exception(error)
-
-    # If errors were detected the batch is failed as a whole. This wastes computation and
-    # incurs some amount service-provider cost.  However, this strategy is an improvement
-    # over dropping messages or manually retrying indefinitely.
-    #
-    # Raising an exception crashes the process and forces a restart from the last committed
-    # offset. No rate-limiting is applied.
-    if has_errors:
-        raise BufferCommitFailed("Could not upload one or more recordings.")
-
-
-def commit_initial_segments(initial_segment_events: list[InitialSegmentEvent]) -> None:
-    for segment in initial_segment_events:
-        track_initial_segment_event(
-            segment["org_id"],
-            segment["project_id"],
-            segment["replay_id"],
-            segment["key_id"],
-            segment["received"],
-            segment["is_replay_video"],
-        )
-
-
-def commit_replay_actions(replay_action_events: list[ReplayActionsEvent]) -> None:
-    for actions in replay_action_events:
-        emit_replay_actions(actions)
-
-
-def _do_upload(upload_event: UploadEvent) -> None:
-    with sentry_sdk.start_span(op="replays.consumer.recording.upload_segment"):
-        # If an error occurs this will retry up to five times by default.
-        #
-        # Refer to `src.sentry.filestore.gcs.GCS_RETRIES`.
-        storage_kv.set(upload_event["key"], upload_event["value"])

+ 79 - 0
src/sentry/replays/consumers/recording_two_step.py

@@ -0,0 +1,79 @@
+import logging
+from collections.abc import Mapping
+
+import sentry_sdk
+from arroyo.backends.kafka.consumer import KafkaPayload
+from arroyo.processing.strategies import RunTask, RunTaskInThreads
+from arroyo.processing.strategies.abstract import ProcessingStrategy, ProcessingStrategyFactory
+from arroyo.processing.strategies.commit import CommitOffsets
+from arroyo.types import Commit, FilteredPayload, Message, Partition
+from django.conf import settings
+
+from sentry.filestore.gcs import GCS_RETRYABLE_ERRORS
+from sentry.replays.usecases.ingest import (
+    DropSilently,
+    ProcessedRecordingMessage,
+    commit_recording_message,
+    parse_recording_message,
+    process_recording_message,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class RecordingTwoStepStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
+    def __init__(self, max_pending_futures: int = 256, num_threads: int = 16) -> None:
+        self.max_pending_futures = max_pending_futures
+        self.num_threads = num_threads
+
+    def create_with_partitions(
+        self,
+        commit: Commit,
+        partitions: Mapping[Partition, int],
+    ) -> ProcessingStrategy[KafkaPayload]:
+        return RunTask(
+            function=process_message,
+            next_step=RunTaskInThreads(
+                processing_function=commit_message,
+                concurrency=self.num_threads,
+                max_pending_futures=self.max_pending_futures,
+                next_step=CommitOffsets(commit),
+            ),
+        )
+
+
+def process_message(message: Message[KafkaPayload]) -> ProcessedRecordingMessage | FilteredPayload:
+    with sentry_sdk.start_transaction(
+        name="replays.consumer.recording_buffered.process_message",
+        op="replays.consumer.recording_buffered",
+        custom_sampling_context={
+            "sample_rate": getattr(settings, "SENTRY_REPLAY_RECORDINGS_CONSUMER_APM_SAMPLING", 0)
+        },
+    ):
+        try:
+            return process_recording_message(parse_recording_message(message.payload.value))
+        except DropSilently:
+            return FilteredPayload()
+        except Exception:
+            logger.exception("Failed to process replay recording message.")
+            return FilteredPayload()
+
+
+def commit_message(message: Message[ProcessedRecordingMessage]) -> None:
+    with sentry_sdk.start_transaction(
+        name="replays.consumer.recording_buffered.commit_message",
+        op="replays.consumer.recording_buffered",
+        custom_sampling_context={
+            "sample_rate": getattr(settings, "SENTRY_REPLAY_RECORDINGS_CONSUMER_APM_SAMPLING", 0)
+        },
+    ):
+        try:
+            commit_recording_message(message.payload)
+            return None
+        except GCS_RETRYABLE_ERRORS:
+            raise
+        except DropSilently:
+            return None
+        except Exception:
+            logger.exception("Failed to commit replay recording message.")
+            return None

+ 1 - 1
tests/sentry/consumers/test_run.py

@@ -47,7 +47,7 @@ def test_dlq(consumer_def) -> None:
         "ingest-profiles",
         "ingest-occurrences",
         "ingest-replay-recordings",
-        "ingest-replay-recordings-buffered",
+        "ingest-replay-recordings-two-step",
     ]
 
     consumer_name, defn = consumer_def

+ 5 - 39
tests/sentry/replays/consumers/test_recording.py

@@ -3,7 +3,6 @@ from __future__ import annotations
 import time
 import uuid
 import zlib
-from collections.abc import Mapping
 from datetime import datetime
 from unittest.mock import ANY, patch
 
@@ -14,39 +13,13 @@ from sentry_kafka_schemas.schema_types.ingest_replay_recordings_v1 import Replay
 
 from sentry.models.organizationonboardingtask import OnboardingTask, OnboardingTaskStatus
 from sentry.replays.consumers.recording import ProcessReplayRecordingStrategyFactory
-from sentry.replays.consumers.recording_buffered import (
-    RecordingBufferedStrategyFactory,
-    cast_payload_from_bytes,
-)
+from sentry.replays.consumers.recording_two_step import RecordingTwoStepStrategyFactory
 from sentry.replays.lib.storage import _make_recording_filename, storage_kv
 from sentry.replays.models import ReplayRecordingSegment
 from sentry.replays.usecases.pack import unpack
 from sentry.testutils.cases import TransactionTestCase
 
 
-def test_multiprocessing_strategy():
-    # num_processes is the only argument that matters. Setting it to `>1` enables
-    # multi-processing.
-    factory = ProcessReplayRecordingStrategyFactory(
-        num_processes=2,
-        num_threads=1,
-        input_block_size=1,
-        max_batch_size=1,
-        max_batch_time=1,
-        output_block_size=1,
-    )
-
-    def _commit(offsets: Mapping[Partition, int], force: bool = False) -> None:
-        raise NotImplementedError
-
-    # Assert the multi-processing step does not fail to initialize.
-    task = factory.create_with_partitions(_commit, {})
-
-    # Clean up after ourselves by terminating the processing pool spawned by the above call.
-    task.terminate()
-    factory.shutdown()
-
-
 class RecordingTestCase(TransactionTestCase):
     replay_id = uuid.uuid4().hex
     replay_recording_id = uuid.uuid4().hex
@@ -139,9 +112,7 @@ class RecordingTestCase(TransactionTestCase):
                 "project_id": self.project.id,
                 "received": int(time.time()),
                 "retention_days": 30,
-                "payload": cast_payload_from_bytes(
-                    f'{{"segment_id":{segment_id}}}\n'.encode() + message
-                ),
+                "payload": f'{{"segment_id":{segment_id}}}\n'.encode() + message,  # type: ignore[typeddict-item]
                 "replay_event": replay_event,  # type: ignore[typeddict-item]
                 "replay_video": replay_video,  # type: ignore[typeddict-item]
             }
@@ -500,14 +471,9 @@ class ThreadedRecordingTestCase(RecordingTestCase):
     force_synchronous = False
 
 
-# Experimental Buffered Recording Consumer
+# Experimental Two Step Recording Consumer
 
 
-class RecordingBufferedTestCase(RecordingTestCase):
+class RecordingTwoStepTestCase(RecordingTestCase):
     def processing_factory(self):
-        # The options don't matter because we're calling join which commits regardless.
-        return RecordingBufferedStrategyFactory(
-            max_buffer_message_count=1000,
-            max_buffer_size_in_bytes=1000,
-            max_buffer_time_in_seconds=1000,
-        )
+        return RecordingTwoStepStrategyFactory()

+ 0 - 147
tests/sentry/replays/unit/test_recording_buffer.py

@@ -1,147 +0,0 @@
-import datetime
-from unittest.mock import patch
-
-import pytest
-import time_machine
-
-from sentry.replays.consumers.recording_buffered import (
-    BufferCommitFailed,
-    RecordingBuffer,
-    commit_uploads,
-)
-
-
-def test_recording_buffer_commit_default():
-    """Test RecordingBuffer commit readiness."""
-    # Assert all.
-    buffer = RecordingBuffer(0, 0, 0)
-    assert buffer.has_exceeded_max_message_count
-    assert buffer.has_exceeded_buffer_byte_size
-    assert buffer.has_exceeded_last_buffer_commit_time
-    assert buffer.is_ready
-
-    # Assert none.
-    buffer = RecordingBuffer(1, 1, 1)
-    assert not buffer.has_exceeded_max_message_count
-    assert not buffer.has_exceeded_buffer_byte_size
-    assert not buffer.has_exceeded_last_buffer_commit_time
-    assert not buffer.is_ready
-
-    # Assert deadline.
-    buffer = RecordingBuffer(1, 1, 0)
-    assert not buffer.has_exceeded_max_message_count
-    assert not buffer.has_exceeded_buffer_byte_size
-    assert buffer.has_exceeded_last_buffer_commit_time
-    assert buffer.is_ready
-
-    # Assert size.
-    buffer = RecordingBuffer(1, 0, 1)
-    assert not buffer.has_exceeded_max_message_count
-    assert buffer.has_exceeded_buffer_byte_size
-    assert not buffer.has_exceeded_last_buffer_commit_time
-    assert buffer.is_ready
-
-    # Assert max messages.
-    buffer = RecordingBuffer(0, 1, 1)
-    assert buffer.has_exceeded_max_message_count
-    assert not buffer.has_exceeded_buffer_byte_size
-    assert not buffer.has_exceeded_last_buffer_commit_time
-    assert buffer.is_ready
-
-
-def test_recording_buffer_commit_deadline():
-    buffer = RecordingBuffer(
-        max_buffer_message_count=1_000_000,  # Never triggers commit.
-        max_buffer_size_in_bytes=1_000_000,  # Never triggers commit.
-        max_buffer_time_in_seconds=5,
-    )
-
-    now = datetime.datetime.now()
-
-    # New buffer; never at expiration.
-    traveller = time_machine.travel(now)
-    traveller.start()
-    assert not buffer.has_exceeded_last_buffer_commit_time
-    assert not buffer.is_ready
-    traveller.stop()
-
-    # Almost at expiration.
-    traveller = time_machine.travel(now + datetime.timedelta(seconds=4))
-    traveller.start()
-    assert not buffer.has_exceeded_last_buffer_commit_time
-    assert not buffer.is_ready
-    traveller.stop()
-
-    # Exactly at expiration.
-    traveller = time_machine.travel(now + datetime.timedelta(seconds=5))
-    traveller.start()
-    assert buffer.has_exceeded_last_buffer_commit_time
-    assert buffer.is_ready  # type: ignore[unreachable]
-    traveller.stop()
-
-    # 55 seconds after expiration.
-    traveller = time_machine.travel(now + datetime.timedelta(seconds=60))
-    traveller.start()
-    assert buffer.has_exceeded_last_buffer_commit_time
-    assert buffer.is_ready
-    traveller.stop()
-
-
-def test_recording_buffer_commit_next_state():
-    now = datetime.datetime(year=2024, month=1, day=1)
-
-    # Create the initial state.
-    traveller = time_machine.travel(now)
-    traveller.start()
-    buffer = RecordingBuffer(
-        max_buffer_message_count=1_000_000,  # Never triggers commit.
-        max_buffer_size_in_bytes=1_000_000,  # Never triggers commit.
-        max_buffer_time_in_seconds=5,
-    )
-    traveller.stop()
-
-    # Advance time by 10 seconds to trigger a commit.
-    traveller = time_machine.travel(now + datetime.timedelta(seconds=10))
-    traveller.start()
-
-    # Cache the first deadline for later use.
-    first_deadline = buffer._buffer_next_commit_time
-
-    # Functionally a no-op but we do reset the buffer to a new empty state.
-    buffer = buffer.new()
-
-    # A new deadline was generated by the call to commit.
-    second_deadline = buffer._buffer_next_commit_time
-
-    assert first_deadline < second_deadline
-    # Deadlines incremented at exactly the rate of time travelled.
-    assert first_deadline + 10 == second_deadline
-    # Deadline advanced by 15 seconds compared to previous buffer's start time.
-    assert second_deadline == int((now + datetime.timedelta(seconds=15)).timestamp())
-
-    traveller.stop()
-
-
-@patch("sentry.replays.consumers.recording_buffered._do_upload")
-def test_commit_uploads(_do_upload):
-    """Assert successful batch does not error."""
-
-    def mocked(u):
-        return None
-
-    _do_upload.side_effect = mocked
-
-    commit_uploads([{}])  # type: ignore[typeddict-item]
-
-
-@patch("sentry.replays.consumers.recording_buffered._do_upload")
-def test_commit_uploads_failure(_do_upload):
-    """Assert _do_upload failure rate limits the consumer process."""
-
-    def mocked(u):
-        raise ValueError("")
-
-    _do_upload.side_effect = mocked
-
-    with pytest.raises(BufferCommitFailed):
-        commit_uploads([{}])  # type: ignore[typeddict-item]