|
@@ -20,7 +20,7 @@ from django.conf import settings
|
|
|
from sentry.ingest.types import ConsumerType
|
|
|
from sentry.processing.backpressure.arroyo import HealthChecker, create_backpressure_step
|
|
|
from sentry.utils import kafka_config
|
|
|
-from sentry.utils.arroyo import RunTaskWithMultiprocessing
|
|
|
+from sentry.utils.arroyo import MultiprocessingPool, RunTaskWithMultiprocessing
|
|
|
|
|
|
from .attachment_event import decode_and_process_chunks, process_attachments_and_events
|
|
|
from .simple_event import process_simple_event_message
|
|
@@ -42,14 +42,16 @@ def maybe_multiprocess_step(
|
|
|
mp: MultiProcessConfig | None,
|
|
|
function: Callable[[Message[TInput]], TOutput],
|
|
|
next_step: ProcessingStrategy[FilteredPayload | TOutput],
|
|
|
+ pool: Optional[MultiprocessingPool],
|
|
|
) -> ProcessingStrategy[FilteredPayload | TInput]:
|
|
|
if mp is not None:
|
|
|
+ assert pool is not None
|
|
|
return RunTaskWithMultiprocessing(
|
|
|
function=function,
|
|
|
next_step=next_step,
|
|
|
- num_processes=mp.num_processes,
|
|
|
max_batch_size=mp.max_batch_size,
|
|
|
max_batch_time=mp.max_batch_time,
|
|
|
+ pool=pool,
|
|
|
input_block_size=mp.input_block_size,
|
|
|
output_block_size=mp.output_block_size,
|
|
|
)
|
|
@@ -74,6 +76,12 @@ class IngestStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
|
|
|
self.is_attachment_topic = consumer_type == ConsumerType.Attachments
|
|
|
|
|
|
self.multi_process = None
|
|
|
+ self._pool = MultiprocessingPool(num_processes)
|
|
|
+
|
|
|
+ # XXX: Attachment topic has two multiprocessing strategies chained together so we use
|
|
|
+ # two pools.
|
|
|
+ if self.is_attachment_topic:
|
|
|
+ self._attachments_pool = MultiprocessingPool(num_processes)
|
|
|
if num_processes > 1:
|
|
|
self.multi_process = MultiProcessConfig(
|
|
|
num_processes, max_batch_size, max_batch_time, input_block_size, output_block_size
|
|
@@ -91,7 +99,9 @@ class IngestStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
|
|
|
final_step = CommitOffsets(commit)
|
|
|
|
|
|
if not self.is_attachment_topic:
|
|
|
- next_step = maybe_multiprocess_step(mp, process_simple_event_message, final_step)
|
|
|
+ next_step = maybe_multiprocess_step(
|
|
|
+ mp, process_simple_event_message, final_step, self._pool
|
|
|
+ )
|
|
|
return create_backpressure_step(health_checker=self.health_checker, next_step=next_step)
|
|
|
|
|
|
# The `attachments` topic is a bit different, as it allows multiple event types:
|
|
@@ -104,7 +114,9 @@ class IngestStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
|
|
|
# are being handled in a step before the event depending on them is processed in a
|
|
|
# later step.
|
|
|
|
|
|
- step_2 = maybe_multiprocess_step(mp, process_attachments_and_events, final_step)
|
|
|
+ step_2 = maybe_multiprocess_step(
|
|
|
+ mp, process_attachments_and_events, final_step, self._attachments_pool
|
|
|
+ )
|
|
|
# This `FilterStep` will skip over processing `None` (aka already handled attachment chunks)
|
|
|
# in the second step. We filter this here explicitly,
|
|
|
# to avoid arroyo from needlessly dispatching `None` messages.
|
|
@@ -113,10 +125,16 @@ class IngestStrategyFactory(ProcessingStrategyFactory[KafkaPayload]):
|
|
|
# As the steps are defined (and types inferred) in reverse order, we would get a type error here,
|
|
|
# as `step_1` outputs an `| None`, but the `filter_step` does not mention that in its type,
|
|
|
# as it is inferred from the `step_2` input type which does not mention `| None`.
|
|
|
- step_1 = maybe_multiprocess_step(mp, decode_and_process_chunks, filter_step) # type:ignore
|
|
|
+ step_1 = maybe_multiprocess_step(
|
|
|
+ mp, decode_and_process_chunks, filter_step, self._pool # type:ignore
|
|
|
+ )
|
|
|
|
|
|
return create_backpressure_step(health_checker=self.health_checker, next_step=step_1)
|
|
|
|
|
|
+ def shutdown(self) -> None:
|
|
|
+ self._pool.close()
|
|
|
+ self._attachments_pool.close()
|
|
|
+
|
|
|
|
|
|
def get_ingest_consumer(
|
|
|
consumer_type: str,
|