Browse Source

Revert "ref: Move more code to the new way of defining kafka topics and overrides (#66283)"

This reverts commit f7ffe5a3d31a87ac334c191cc8eb9550d2e1ebce.

Co-authored-by: ayirr7 <47572810+ayirr7@users.noreply.github.com>
getsentry-bot 1 year ago
parent
commit
622827e763

+ 2 - 8
src/sentry/conf/server.py

@@ -21,6 +21,7 @@ from sentry.conf.types.kafka_definition import ConsumerDefinition
 from sentry.conf.types.logging_config import LoggingConfig
 from sentry.conf.types.role_dict import RoleDict
 from sentry.conf.types.sdk_config import ServerSdkConfig
+from sentry.conf.types.topic_definition import TopicDefinition
 from sentry.utils import json  # NOQA (used in getsentry config)
 from sentry.utils.celery import crontab_with_minute_jitter
 from sentry.utils.types import Type, type_from_value
@@ -3526,16 +3527,9 @@ KAFKA_TOPIC_TO_CLUSTER: Mapping[str, str] = {
     "shared-resources-usage": "default",
 }
 
-from typing import TypedDict
-
-
-class LegacyTopicDefinition(TypedDict):
-    cluster: str
-
-
 # Cluster configuration for each Kafka topic by name.
 # DEPRECATED
-KAFKA_TOPICS: Mapping[str, LegacyTopicDefinition] = {
+KAFKA_TOPICS: Mapping[str, TopicDefinition] = {
     KAFKA_EVENTS: {"cluster": "default"},
     KAFKA_EVENTS_COMMIT_LOG: {"cluster": "default"},
     KAFKA_TRANSACTIONS: {"cluster": "default"},

+ 1 - 1
src/sentry/conf/types/kafka_definition.py

@@ -70,7 +70,7 @@ class ConsumerDefinition(TypedDict, total=False):
     synchronize_commit_group_default: str
     synchronize_commit_log_topic_default: str
 
-    dlq_topic: Topic
+    dlq_topic: str
     dlq_max_invalid_ratio: float | None
     dlq_max_consecutive_count: int | None
 

+ 0 - 2
src/sentry/conf/types/topic_definition.py

@@ -5,5 +5,3 @@ from typing import TypedDict
 
 class TopicDefinition(TypedDict):
     cluster: str
-    # The topic name may be overridden from the default via KAFKA_TOPIC_OVERRIDES
-    real_topic_name: str

+ 4 - 5
src/sentry/consumers/__init__.py

@@ -290,7 +290,7 @@ KAFKA_CONSUMERS: Mapping[str, ConsumerDefinition] = {
         "static_args": {
             "ingest_profile": "release-health",
         },
-        "dlq_topic": Topic.INGEST_METRICS_DLQ,
+        "dlq_topic": settings.KAFKA_INGEST_METRICS_DLQ,
         "dlq_max_invalid_ratio": 0.01,
         "dlq_max_consecutive_count": 1000,
     },
@@ -301,7 +301,7 @@ KAFKA_CONSUMERS: Mapping[str, ConsumerDefinition] = {
         "static_args": {
             "ingest_profile": "performance",
         },
-        "dlq_topic": Topic.INGEST_GENERIC_METRICS_DLQ,
+        "dlq_topic": settings.KAFKA_INGEST_GENERIC_METRICS_DLQ,
         "dlq_max_invalid_ratio": 0.01,
         "dlq_max_consecutive_count": 1000,
     },
@@ -509,8 +509,7 @@ def get_stream_processor(
                 f"Cannot enable DLQ for consumer: {consumer_name}, no DLQ topic has been defined for it"
             ) from e
         try:
-            dlq_topic_defn = get_topic_definition(dlq_topic)
-            cluster_setting = dlq_topic_defn["cluster"]
+            cluster_setting = get_topic_definition(dlq_topic)["cluster"]
         except ValueError as e:
             raise click.BadParameter(
                 f"Cannot enable DLQ for consumer: {consumer_name}, DLQ topic {dlq_topic} is not configured in this environment"
@@ -520,7 +519,7 @@ def get_stream_processor(
         dlq_producer = KafkaProducer(producer_config)
 
         dlq_policy = DlqPolicy(
-            KafkaDlqProducer(dlq_producer, ArroyoTopic(dlq_topic_defn["real_topic_name"])),
+            KafkaDlqProducer(dlq_producer, ArroyoTopic(dlq_topic)),
             DlqLimit(
                 max_invalid_ratio=consumer_definition["dlq_max_invalid_ratio"],
                 max_consecutive_count=consumer_definition["dlq_max_consecutive_count"],

+ 8 - 10
src/sentry/eventstream/kafka/backend.py

@@ -7,9 +7,9 @@ from typing import TYPE_CHECKING, Any
 from confluent_kafka import KafkaError
 from confluent_kafka import Message as KafkaMessage
 from confluent_kafka import Producer
+from django.conf import settings
 
 from sentry import options
-from sentry.conf.types.kafka_definition import Topic
 from sentry.eventstream.base import EventStreamEventType, GroupStates
 from sentry.eventstream.snuba import KW_SKIP_SEMANTIC_PARTITIONING, SnubaProtocolEventStream
 from sentry.killswitches import killswitch_matches_context
@@ -24,15 +24,15 @@ if TYPE_CHECKING:
 
 class KafkaEventStream(SnubaProtocolEventStream):
     def __init__(self, **options: Any) -> None:
-        self.topic = Topic.EVENTS
-        self.transactions_topic = Topic.TRANSACTIONS
-        self.issue_platform_topic = Topic.EVENTSTREAM_GENERIC
-        self.__producers: MutableMapping[Topic, Producer] = {}
+        self.topic = settings.KAFKA_EVENTS
+        self.transactions_topic = settings.KAFKA_TRANSACTIONS
+        self.issue_platform_topic = settings.KAFKA_EVENTSTREAM_GENERIC
+        self.__producers: MutableMapping[str, Producer] = {}
 
-    def get_transactions_topic(self, project_id: int) -> Topic:
+    def get_transactions_topic(self, project_id: int) -> str:
         return self.transactions_topic
 
-    def get_producer(self, topic: Topic) -> Producer:
+    def get_producer(self, topic: str) -> Producer:
         if topic not in self.__producers:
             cluster_name = get_topic_definition(topic)["cluster"]
             cluster_options = get_kafka_producer_cluster_options(cluster_name)
@@ -202,11 +202,9 @@ class KafkaEventStream(SnubaProtocolEventStream):
 
         assert isinstance(extra_data, tuple)
 
-        real_topic = get_topic_definition(topic)["real_topic_name"]
-
         try:
             producer.produce(
-                topic=real_topic,
+                topic=topic,
                 key=str(project_id).encode("utf-8") if not skip_semantic_partitioning else None,
                 value=json.dumps((self.EVENT_PROTOCOL_VERSION, _type) + extra_data),
                 on_delivery=self.delivery_callback,

+ 3 - 4
src/sentry/issues/attributes.py

@@ -6,7 +6,7 @@ from typing import cast
 
 import requests
 import urllib3
-from arroyo import Topic as ArroyoTopic
+from arroyo import Topic
 from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
 from django.conf import settings
 from django.db.models import F, Window
@@ -16,7 +16,6 @@ from django.dispatch import receiver
 from sentry_kafka_schemas.schema_types.group_attributes_v1 import GroupAttributesSnapshot
 
 from sentry import options
-from sentry.conf.types.kafka_definition import Topic
 from sentry.models.group import Group
 from sentry.models.groupassignee import GroupAssignee
 from sentry.models.groupowner import GroupOwner, GroupOwnerType
@@ -45,7 +44,7 @@ class GroupValues:
 
 
 def _get_attribute_snapshot_producer() -> KafkaProducer:
-    cluster_name = get_topic_definition(Topic.GROUP_ATTRIBUTES)["cluster"]
+    cluster_name = get_topic_definition(settings.KAFKA_GROUP_ATTRIBUTES)["cluster"]
     producer_config = get_kafka_producer_cluster_options(cluster_name)
     producer_config.pop("compression.type", None)
     producer_config.pop("message.max.bytes", None)
@@ -123,7 +122,7 @@ def produce_snapshot_to_kafka(snapshot: GroupAttributesSnapshot) -> None:
             raise snuba.SnubaError(err)
     else:
         payload = KafkaPayload(None, json.dumps(snapshot).encode("utf-8"), [])
-        _attribute_snapshot_producer.produce(ArroyoTopic(settings.KAFKA_GROUP_ATTRIBUTES), payload)
+        _attribute_snapshot_producer.produce(Topic(settings.KAFKA_GROUP_ATTRIBUTES), payload)
 
 
 def _retrieve_group_values(group_id: int) -> GroupValues:

+ 3 - 4
src/sentry/issues/producer.py

@@ -4,12 +4,11 @@ import logging
 from collections.abc import MutableMapping
 from typing import Any, cast
 
-from arroyo import Topic as ArroyoTopic
+from arroyo import Topic
 from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
 from arroyo.types import Message, Value
 from django.conf import settings
 
-from sentry.conf.types.kafka_definition import Topic
 from sentry.issues.issue_occurrence import IssueOccurrence
 from sentry.issues.run import process_message
 from sentry.issues.status_change_message import StatusChangeMessage
@@ -34,7 +33,7 @@ class PayloadType(ValueEqualityEnum):
 
 
 def _get_occurrence_producer() -> KafkaProducer:
-    cluster_name = get_topic_definition(Topic.INGEST_OCCURRENCES)["cluster"]
+    cluster_name = get_topic_definition(settings.KAFKA_INGEST_OCCURRENCES)["cluster"]
     producer_config = get_kafka_producer_cluster_options(cluster_name)
     producer_config.pop("compression.type", None)
     producer_config.pop("message.max.bytes", None)
@@ -69,7 +68,7 @@ def produce_occurrence_to_kafka(
         process_message(Message(Value(payload=payload, committable={})))
         return
 
-    _occurrence_producer.produce(ArroyoTopic(settings.KAFKA_INGEST_OCCURRENCES), payload)
+    _occurrence_producer.produce(Topic(settings.KAFKA_INGEST_OCCURRENCES), payload)
 
 
 def _prepare_occurrence_message(

+ 6 - 8
src/sentry/monitors/tasks.py

@@ -7,13 +7,11 @@ from functools import lru_cache
 
 import msgpack
 import sentry_sdk
-from arroyo import Partition
-from arroyo import Topic as ArroyoTopic
+from arroyo import Partition, Topic
 from arroyo.backends.kafka import KafkaPayload, KafkaProducer, build_kafka_configuration
 from confluent_kafka.admin import AdminClient, PartitionMetadata
 from django.conf import settings
 
-from sentry.conf.types.kafka_definition import Topic
 from sentry.constants import ObjectStatus
 from sentry.monitors.logic.mark_failed import mark_failed
 from sentry.monitors.schedule import get_prev_schedule
@@ -52,7 +50,7 @@ MONITOR_TASKS_PARTITION_CLOCKS = "sentry.monitors.partition_clocks"
 
 
 def _get_producer() -> KafkaProducer:
-    cluster_name = get_topic_definition(Topic.INGEST_MONITORS)["cluster"]
+    cluster_name = get_topic_definition(settings.KAFKA_INGEST_MONITORS)["cluster"]
     producer_config = get_kafka_producer_cluster_options(cluster_name)
     producer_config.pop("compression.type", None)
     producer_config.pop("message.max.bytes", None)
@@ -64,10 +62,10 @@ _checkin_producer = SingletonProducer(_get_producer)
 
 @lru_cache(maxsize=None)
 def _get_partitions() -> Mapping[int, PartitionMetadata]:
-    topic_defn = get_topic_definition(Topic.INGEST_MONITORS)
-    topic = topic_defn["real_topic_name"]
+    topic = settings.KAFKA_INGEST_MONITORS
+    cluster_name = get_topic_definition(topic)["cluster"]
 
-    conf = get_kafka_admin_cluster_options(topic_defn["cluster"])
+    conf = get_kafka_admin_cluster_options(cluster_name)
     admin_client = AdminClient(conf)
     result = admin_client.list_topics(topic)
     topic_metadata = result.topics.get(topic)
@@ -205,7 +203,7 @@ def clock_pulse(current_datetime=None):
     # topic. This is a requirement to ensure that none of the partitions stall,
     # since the global clock is tied to the slowest partition.
     for partition in _get_partitions().values():
-        dest = Partition(ArroyoTopic(settings.KAFKA_INGEST_MONITORS), partition.id)
+        dest = Partition(Topic(settings.KAFKA_INGEST_MONITORS), partition.id)
         _checkin_producer.produce(dest, payload)
 
 

+ 3 - 2
src/sentry/replays/lib/kafka.py

@@ -1,4 +1,5 @@
-from sentry.conf.types.kafka_definition import Topic
+from django.conf import settings
+
 from sentry.utils.kafka_config import get_kafka_producer_cluster_options, get_topic_definition
 from sentry.utils.pubsub import KafkaPublisher
 
@@ -9,7 +10,7 @@ def initialize_replays_publisher(is_async=False) -> KafkaPublisher:
     global replay_publisher
 
     if replay_publisher is None:
-        config = get_topic_definition(Topic.INGEST_REPLAY_EVENTS)
+        config = get_topic_definition(settings.KAFKA_INGEST_REPLAY_EVENTS)
         replay_publisher = KafkaPublisher(
             get_kafka_producer_cluster_options(config["cluster"]),
             asynchronous=is_async,

+ 3 - 2
src/sentry/replays/usecases/ingest/dom_index.py

@@ -8,8 +8,9 @@ from collections.abc import Generator
 from hashlib import md5
 from typing import Any, Literal, TypedDict, cast
 
+from django.conf import settings
+
 from sentry import features
-from sentry.conf.types.kafka_definition import Topic
 from sentry.models.project import Project
 from sentry.replays.usecases.ingest.events import SentryEvent
 from sentry.replays.usecases.ingest.issue_creation import (
@@ -218,7 +219,7 @@ def _initialize_publisher() -> KafkaPublisher:
     global replay_publisher
 
     if replay_publisher is None:
-        config = kafka_config.get_topic_definition(Topic.INGEST_REPLAY_EVENTS)
+        config = kafka_config.get_topic_definition(settings.KAFKA_INGEST_REPLAY_EVENTS)
         replay_publisher = KafkaPublisher(
             kafka_config.get_kafka_producer_cluster_options(config["cluster"])
         )

Some files were not shown because too many files changed in this diff