Browse Source

ref: Move code to new-style kafka topics - take 2 (#66381)

Brings back https://github.com/getsentry/sentry/pull/66283 with fixes to
the indexer strategy. It crashed on the prior deploy as we were not
producing to the correctly resolved topic.
Lyn Nagara 1 year ago
parent
commit
6fa756ee84

+ 8 - 2
src/sentry/conf/server.py

@@ -21,7 +21,6 @@ from sentry.conf.types.kafka_definition import ConsumerDefinition
 from sentry.conf.types.logging_config import LoggingConfig
 from sentry.conf.types.role_dict import RoleDict
 from sentry.conf.types.sdk_config import ServerSdkConfig
-from sentry.conf.types.topic_definition import TopicDefinition
 from sentry.utils import json  # NOQA (used in getsentry config)
 from sentry.utils.celery import crontab_with_minute_jitter
 from sentry.utils.types import Type, type_from_value
@@ -3529,9 +3528,16 @@ KAFKA_TOPIC_TO_CLUSTER: Mapping[str, str] = {
     "shared-resources-usage": "default",
 }
 
+from typing import TypedDict
+
+
+class LegacyTopicDefinition(TypedDict):
+    cluster: str
+
+
 # Cluster configuration for each Kafka topic by name.
 # DEPRECATED
-KAFKA_TOPICS: Mapping[str, TopicDefinition] = {
+KAFKA_TOPICS: Mapping[str, LegacyTopicDefinition] = {
     KAFKA_EVENTS: {"cluster": "default"},
     KAFKA_EVENTS_COMMIT_LOG: {"cluster": "default"},
     KAFKA_TRANSACTIONS: {"cluster": "default"},

+ 1 - 1
src/sentry/conf/types/kafka_definition.py

@@ -70,7 +70,7 @@ class ConsumerDefinition(TypedDict, total=False):
     synchronize_commit_group_default: str
     synchronize_commit_log_topic_default: str
 
-    dlq_topic: str
+    dlq_topic: Topic
     dlq_max_invalid_ratio: float | None
     dlq_max_consecutive_count: int | None
 

+ 2 - 0
src/sentry/conf/types/topic_definition.py

@@ -5,3 +5,5 @@ from typing import TypedDict
 
 class TopicDefinition(TypedDict):
     cluster: str
+    # The topic name may be overridden from the default via KAFKA_TOPIC_OVERRIDES
+    real_topic_name: str

+ 5 - 4
src/sentry/consumers/__init__.py

@@ -298,7 +298,7 @@ KAFKA_CONSUMERS: Mapping[str, ConsumerDefinition] = {
         "static_args": {
             "ingest_profile": "release-health",
         },
-        "dlq_topic": settings.KAFKA_INGEST_METRICS_DLQ,
+        "dlq_topic": Topic.INGEST_METRICS_DLQ,
         "dlq_max_invalid_ratio": 0.01,
         "dlq_max_consecutive_count": 1000,
     },
@@ -309,7 +309,7 @@ KAFKA_CONSUMERS: Mapping[str, ConsumerDefinition] = {
         "static_args": {
             "ingest_profile": "performance",
         },
-        "dlq_topic": settings.KAFKA_INGEST_GENERIC_METRICS_DLQ,
+        "dlq_topic": Topic.INGEST_GENERIC_METRICS_DLQ,
         "dlq_max_invalid_ratio": 0.01,
         "dlq_max_consecutive_count": 1000,
     },
@@ -517,7 +517,8 @@ def get_stream_processor(
                 f"Cannot enable DLQ for consumer: {consumer_name}, no DLQ topic has been defined for it"
             ) from e
         try:
-            cluster_setting = get_topic_definition(dlq_topic)["cluster"]
+            dlq_topic_defn = get_topic_definition(dlq_topic)
+            cluster_setting = dlq_topic_defn["cluster"]
         except ValueError as e:
             raise click.BadParameter(
                 f"Cannot enable DLQ for consumer: {consumer_name}, DLQ topic {dlq_topic} is not configured in this environment"
@@ -527,7 +528,7 @@ def get_stream_processor(
         dlq_producer = KafkaProducer(producer_config)
 
         dlq_policy = DlqPolicy(
-            KafkaDlqProducer(dlq_producer, ArroyoTopic(dlq_topic)),
+            KafkaDlqProducer(dlq_producer, ArroyoTopic(dlq_topic_defn["real_topic_name"])),
             DlqLimit(
                 max_invalid_ratio=consumer_definition["dlq_max_invalid_ratio"],
                 max_consecutive_count=consumer_definition["dlq_max_consecutive_count"],

+ 10 - 8
src/sentry/eventstream/kafka/backend.py

@@ -7,9 +7,9 @@ from typing import TYPE_CHECKING, Any
 from confluent_kafka import KafkaError
 from confluent_kafka import Message as KafkaMessage
 from confluent_kafka import Producer
-from django.conf import settings
 
 from sentry import options
+from sentry.conf.types.kafka_definition import Topic
 from sentry.eventstream.base import EventStreamEventType, GroupStates
 from sentry.eventstream.snuba import KW_SKIP_SEMANTIC_PARTITIONING, SnubaProtocolEventStream
 from sentry.killswitches import killswitch_matches_context
@@ -24,15 +24,15 @@ if TYPE_CHECKING:
 
 class KafkaEventStream(SnubaProtocolEventStream):
     def __init__(self, **options: Any) -> None:
-        self.topic = settings.KAFKA_EVENTS
-        self.transactions_topic = settings.KAFKA_TRANSACTIONS
-        self.issue_platform_topic = settings.KAFKA_EVENTSTREAM_GENERIC
-        self.__producers: MutableMapping[str, Producer] = {}
+        self.topic = Topic.EVENTS
+        self.transactions_topic = Topic.TRANSACTIONS
+        self.issue_platform_topic = Topic.EVENTSTREAM_GENERIC
+        self.__producers: MutableMapping[Topic, Producer] = {}
 
-    def get_transactions_topic(self, project_id: int) -> str:
+    def get_transactions_topic(self, project_id: int) -> Topic:
         return self.transactions_topic
 
-    def get_producer(self, topic: str) -> Producer:
+    def get_producer(self, topic: Topic) -> Producer:
         if topic not in self.__producers:
             cluster_name = get_topic_definition(topic)["cluster"]
             cluster_options = get_kafka_producer_cluster_options(cluster_name)
@@ -202,9 +202,11 @@ class KafkaEventStream(SnubaProtocolEventStream):
 
         assert isinstance(extra_data, tuple)
 
+        real_topic = get_topic_definition(topic)["real_topic_name"]
+
         try:
             producer.produce(
-                topic=topic,
+                topic=real_topic,
                 key=str(project_id).encode("utf-8") if not skip_semantic_partitioning else None,
                 value=json.dumps((self.EVENT_PROTOCOL_VERSION, _type) + extra_data),
                 on_delivery=self.delivery_callback,

+ 4 - 3
src/sentry/issues/attributes.py

@@ -6,7 +6,7 @@ from typing import cast
 
 import requests
 import urllib3
-from arroyo import Topic
+from arroyo import Topic as ArroyoTopic
 from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
 from django.conf import settings
 from django.db.models import F, Window
@@ -16,6 +16,7 @@ from django.dispatch import receiver
 from sentry_kafka_schemas.schema_types.group_attributes_v1 import GroupAttributesSnapshot
 
 from sentry import options
+from sentry.conf.types.kafka_definition import Topic
 from sentry.models.group import Group
 from sentry.models.groupassignee import GroupAssignee
 from sentry.models.groupowner import GroupOwner, GroupOwnerType
@@ -44,7 +45,7 @@ class GroupValues:
 
 
 def _get_attribute_snapshot_producer() -> KafkaProducer:
-    cluster_name = get_topic_definition(settings.KAFKA_GROUP_ATTRIBUTES)["cluster"]
+    cluster_name = get_topic_definition(Topic.GROUP_ATTRIBUTES)["cluster"]
     producer_config = get_kafka_producer_cluster_options(cluster_name)
     producer_config.pop("compression.type", None)
     producer_config.pop("message.max.bytes", None)
@@ -122,7 +123,7 @@ def produce_snapshot_to_kafka(snapshot: GroupAttributesSnapshot) -> None:
             raise snuba.SnubaError(err)
     else:
         payload = KafkaPayload(None, json.dumps(snapshot).encode("utf-8"), [])
-        _attribute_snapshot_producer.produce(Topic(settings.KAFKA_GROUP_ATTRIBUTES), payload)
+        _attribute_snapshot_producer.produce(ArroyoTopic(settings.KAFKA_GROUP_ATTRIBUTES), payload)
 
 
 def _retrieve_group_values(group_id: int) -> GroupValues:

+ 4 - 3
src/sentry/issues/producer.py

@@ -4,11 +4,12 @@ import logging
 from collections.abc import MutableMapping
 from typing import Any, cast
 
-from arroyo import Topic
+from arroyo import Topic as ArroyoTopic
 from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
 from arroyo.types import Message, Value
 from django.conf import settings
 
+from sentry.conf.types.kafka_definition import Topic
 from sentry.issues.issue_occurrence import IssueOccurrence
 from sentry.issues.run import process_message
 from sentry.issues.status_change_message import StatusChangeMessage
@@ -33,7 +34,7 @@ class PayloadType(ValueEqualityEnum):
 
 
 def _get_occurrence_producer() -> KafkaProducer:
-    cluster_name = get_topic_definition(settings.KAFKA_INGEST_OCCURRENCES)["cluster"]
+    cluster_name = get_topic_definition(Topic.INGEST_OCCURRENCES)["cluster"]
     producer_config = get_kafka_producer_cluster_options(cluster_name)
     producer_config.pop("compression.type", None)
     producer_config.pop("message.max.bytes", None)
@@ -68,7 +69,7 @@ def produce_occurrence_to_kafka(
         process_message(Message(Value(payload=payload, committable={})))
         return
 
-    _occurrence_producer.produce(Topic(settings.KAFKA_INGEST_OCCURRENCES), payload)
+    _occurrence_producer.produce(ArroyoTopic(settings.KAFKA_INGEST_OCCURRENCES), payload)
 
 
 def _prepare_occurrence_message(

+ 8 - 6
src/sentry/monitors/tasks.py

@@ -7,11 +7,13 @@ from functools import lru_cache
 
 import msgpack
 import sentry_sdk
-from arroyo import Partition, Topic
+from arroyo import Partition
+from arroyo import Topic as ArroyoTopic
 from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
 from confluent_kafka.admin import AdminClient, PartitionMetadata
 from django.conf import settings
 
+from sentry.conf.types.kafka_definition import Topic
 from sentry.constants import ObjectStatus
 from sentry.monitors.logic.mark_failed import mark_failed
 from sentry.monitors.schedule import get_prev_schedule
@@ -50,7 +52,7 @@ MONITOR_TASKS_PARTITION_CLOCKS = "sentry.monitors.partition_clocks"
 
 
 def _get_producer() -> KafkaProducer:
-    cluster_name = get_topic_definition(settings.KAFKA_INGEST_MONITORS)["cluster"]
+    cluster_name = get_topic_definition(Topic.INGEST_MONITORS)["cluster"]
     producer_config = get_kafka_producer_cluster_options(cluster_name)
     producer_config.pop("compression.type", None)
     producer_config.pop("message.max.bytes", None)
@@ -62,10 +64,10 @@ _checkin_producer = SingletonProducer(_get_producer)
 
 @lru_cache(maxsize=None)
 def _get_partitions() -> Mapping[int, PartitionMetadata]:
-    topic = settings.KAFKA_INGEST_MONITORS
-    cluster_name = get_topic_definition(topic)["cluster"]
+    topic_defn = get_topic_definition(Topic.INGEST_MONITORS)
+    topic = topic_defn["real_topic_name"]
 
-    conf = get_kafka_admin_cluster_options(cluster_name)
+    conf = get_kafka_admin_cluster_options(topic_defn["cluster"])
     admin_client = AdminClient(conf)
     result = admin_client.list_topics(topic)
     topic_metadata = result.topics.get(topic)
@@ -203,7 +205,7 @@ def clock_pulse(current_datetime=None):
     # topic. This is a requirement to ensure that none of the partitions stall,
     # since the global clock is tied to the slowest partition.
     for partition in _get_partitions().values():
-        dest = Partition(Topic(settings.KAFKA_INGEST_MONITORS), partition.id)
+        dest = Partition(ArroyoTopic(settings.KAFKA_INGEST_MONITORS), partition.id)
         _checkin_producer.produce(dest, payload)
 
 

+ 2 - 3
src/sentry/replays/lib/kafka.py

@@ -1,5 +1,4 @@
-from django.conf import settings
-
+from sentry.conf.types.kafka_definition import Topic
 from sentry.utils.kafka_config import get_kafka_producer_cluster_options, get_topic_definition
 from sentry.utils.pubsub import KafkaPublisher
 
@@ -10,7 +9,7 @@ def initialize_replays_publisher(is_async=False) -> KafkaPublisher:
     global replay_publisher
 
     if replay_publisher is None:
-        config = get_topic_definition(settings.KAFKA_INGEST_REPLAY_EVENTS)
+        config = get_topic_definition(Topic.INGEST_REPLAY_EVENTS)
         replay_publisher = KafkaPublisher(
             get_kafka_producer_cluster_options(config["cluster"]),
             asynchronous=is_async,

+ 2 - 3
src/sentry/replays/usecases/ingest/dom_index.py

@@ -8,9 +8,8 @@ from collections.abc import Generator
 from hashlib import md5
 from typing import Any, Literal, TypedDict, cast
 
-from django.conf import settings
-
 from sentry import features
+from sentry.conf.types.kafka_definition import Topic
 from sentry.models.project import Project
 from sentry.replays.usecases.ingest.events import SentryEvent
 from sentry.replays.usecases.ingest.issue_creation import (
@@ -219,7 +218,7 @@ def _initialize_publisher() -> KafkaPublisher:
     global replay_publisher
 
     if replay_publisher is None:
-        config = kafka_config.get_topic_definition(settings.KAFKA_INGEST_REPLAY_EVENTS)
+        config = kafka_config.get_topic_definition(Topic.INGEST_REPLAY_EVENTS)
         replay_publisher = KafkaPublisher(
             kafka_config.get_kafka_producer_cluster_options(config["cluster"])
         )

Some files were not shown because too many files changed in this diff