1 год назад · a412429aac
--- a/bin/send_metrics.py
+++ b/bin/send_metrics.py
@@ -149,7 +149,13 @@ def produce_msgs(messages, is_generic, host, dryrun):
 
															     show_default=True,
														
 
															     help="Specify which org id(s) to send",
														
 
															 )
														
 
															-def main(use_cases, rand_str, host, dryrun, org_id):
														
 
															+@click.option(
														
 
															+    "--num-bad-msg",
														
 
															+    default=0,
														
 
															+    show_default=True,
														
 
															+    help="Number of additional badly formatted metric messages to send",
														
 
															+)
														
 
															+def main(use_cases, rand_str, host, dryrun, org_id, num_bad_msg):
														
 
															     if UseCaseID.SESSIONS.value in use_cases and len(use_cases) > 1:
														
 
															         click.secho(
														
 
															             "ERROR: UseCaseID.SESSIONS is in use_cases and there are more than 1 use cases",
														
@@ -158,9 +164,10 @@ def main(use_cases, rand_str, host, dryrun, org_id):
 
															         )
														
 
															         exit(1)
														
 
															+    rand_str = rand_str or "".join(random.choices(string.ascii_uppercase + string.digits, k=8))
														
 
															+
														
 
															     is_generic = UseCaseID.SESSIONS.value not in use_cases
														
 
															-    rand_str = rand_str or "".join(random.choices(string.ascii_uppercase + string.digits, k=8))
														
 
															     messages = list(
														
 
															         itertools.chain.from_iterable(
														
 
															             (
														
@@ -172,6 +179,9 @@ def main(use_cases, rand_str, host, dryrun, org_id):
 
															             for org in org_id
														
 
															         )
														
 
															     )
														
 
															+
														
 
															+    messages.extend([{"BAD_VALUE": rand_str, "idx": i} for i in range(num_bad_msg)])
														
 
															+
														
 
															     random.shuffle(messages)
														
 
															     produce_msgs(messages, is_generic, host, dryrun)
														
--- a/src/sentry/sentry_metrics/consumers/indexer/batch.py
+++ b/src/sentry/sentry_metrics/consumers/indexer/batch.py
@@ -1,6 +1,6 @@
 
															 import logging
														
 
															 import random
														
 
															-from collections import defaultdict
														
 
															+from collections import defaultdict, deque
														
 
															 from typing import (
														
 
															     Any,
														
 
															     Callable,
														
@@ -126,8 +126,13 @@ class IndexerBatch:
 
															                 parsed_payload = self._extract_message(msg)
														
 
															                 self._validate_message(parsed_payload)
														
 
															                 self.parsed_payloads_by_meta[broker_meta] = parsed_payload
														
 
															-            except Exception:
														
 
															+            except Exception as e:
														
 
															                 self.invalid_msg_meta.add(broker_meta)
														
 
															+                logger.error(
														
 
															+                    e,
														
 
															+                    extra={"payload_value": str(msg.payload.value)},
														
 
															+                    exc_info=True,
														
 
															+                )
														
 
															         for namespace, cnt in skipped_msgs_cnt.items():
														
 
															             metrics.incr(
														
@@ -163,17 +168,7 @@ class IndexerBatch:
 
															                 extra={"payload_value": str(msg.payload.value)},
														
 
															                 exc_info=True,
														
 
															             )
														
 
															-        try:
														
 
															-            parsed_payload["use_case_id"] = use_case_id = extract_use_case_id(
														
 
															-                parsed_payload["name"]
														
 
															-            )
														
 
															-        except ValidationError:
														
 
															-            logger.error(
														
 
															-                "process_messages.invalid_metric_resource_identifier",
														
 
															-                extra={"payload_value": str(msg.payload.value)},
														
 
															-                exc_info=True,
														
 
															-            )
														
 
															-            raise
														
 
															+        parsed_payload["use_case_id"] = use_case_id = extract_use_case_id(parsed_payload["name"])
														
 
															         self.__message_count[use_case_id] += 1
														
 
															         self.__message_size_max[use_case_id] = max(
														
@@ -501,5 +496,6 @@ class IndexerBatch:
 
															             )
														
 
															         return IndexerOutputMessageBatch(
														
 
															             new_messages,
														
 
															+            deque(sorted(self.invalid_msg_meta)),
														
 
															             cogs_usage,
														
 
															         )
														
--- a/src/sentry/sentry_metrics/consumers/indexer/common.py
+++ b/src/sentry/sentry_metrics/consumers/indexer/common.py
@@ -1,7 +1,17 @@
 
															 import logging
														
 
															 import time
														
 
															 from dataclasses import dataclass
														
 
															-from typing import Any, List, Mapping, MutableMapping, MutableSequence, NamedTuple, Optional, Union
														
 
															+from typing import (
														
 
															+    Any,
														
 
															+    Deque,
														
 
															+    List,
														
 
															+    Mapping,
														
 
															+    MutableMapping,
														
 
															+    MutableSequence,
														
 
															+    NamedTuple,
														
 
															+    Optional,
														
 
															+    Union,
														
 
															+)
														
 
															 from arroyo import Partition
														
 
															 from arroyo.backends.kafka import KafkaPayload
														
@@ -32,6 +42,7 @@ DEFAULT_QUEUED_MIN_MESSAGES = 100000
 
															 @dataclass(frozen=True)
														
 
															 class IndexerOutputMessageBatch:
														
 
															     data: MutableSequence[Message[Union[RoutingPayload, KafkaPayload]]]
														
 
															+    invalid_msg_meta: Deque[BrokerMeta]
														
 
															     cogs_data: Mapping[UseCaseID, int]
														
--- a/src/sentry/sentry_metrics/consumers/indexer/multiprocess.py
+++ b/src/sentry/sentry_metrics/consumers/indexer/multiprocess.py
@@ -1,12 +1,12 @@
 
															 import logging
														
 
															 import time
														
 
															 from functools import partial
														
 
															-from typing import Any, Mapping, MutableMapping, Optional
														
 
															+from typing import Any, Mapping, MutableMapping, Optional, Union
														
 
															 from arroyo.backends.abstract import Producer as AbstractProducer
														
 
															 from arroyo.backends.kafka import KafkaPayload
														
 
															 from arroyo.processing.strategies import ProcessingStrategy as ProcessingStep
														
 
															-from arroyo.types import Commit, Message, Partition
														
 
															+from arroyo.types import Commit, FilteredPayload, Message, Partition
														
 
															 from confluent_kafka import Producer
														
 
															 from sentry.utils import kafka_config, metrics
														
@@ -68,7 +68,14 @@ class SimpleProduceStep(ProcessingStep[KafkaPayload]):
 
															             self.__commit_function(self.__produced_message_offsets)
														
 
															             self.__produced_message_offsets = {}
														
 
															-    def submit(self, message: Message[KafkaPayload]) -> None:
														
 
															+    def submit(self, message: Message[Union[KafkaPayload, FilteredPayload]]) -> None:
														
 
															+        if isinstance(message.payload, FilteredPayload):
														
 
															+            # FilteredPayload will not be commited, this may cause the the indexer to consume
														
 
															+            # and produce invalid message to the DLQ twice if the last messages it consume
														
 
															+            # are invalid and is then shutdown. But it will never produce valid messages
														
 
															+            # twice to snuba
														
 
															+            # TODO: Use the arroyo producer which handles FilteredPayload elegantly
														
 
															+            return
														
 
															         self.__producer.produce(
														
 
															             topic=self.__producer_topic,
														
 
															             key=None,
														
--- a/src/sentry/sentry_metrics/consumers/indexer/parallel.py
+++ b/src/sentry/sentry_metrics/consumers/indexer/parallel.py
@@ -2,10 +2,12 @@ from __future__ import annotations
 
															 import functools
														
 
															 import logging
														
 
															-from typing import Any, Mapping, Optional, Union, cast
														
 
															+from collections import deque
														
 
															+from typing import Any, Deque, Mapping, NamedTuple, Optional, Union, cast
														
 
															 from arroyo.backends.kafka import KafkaConsumer, KafkaPayload
														
 
															 from arroyo.commit import ONCE_PER_SECOND
														
 
															+from arroyo.dlq import InvalidMessage
														
 
															 from arroyo.processing import StreamProcessor
														
 
															 from arroyo.processing.strategies import ProcessingStrategy
														
 
															 from arroyo.processing.strategies import ProcessingStrategy as ProcessingStep
														
@@ -39,19 +41,31 @@ logger = logging.getLogger(__name__)
 
															 class Unbatcher(ProcessingStep[Union[FilteredPayload, IndexerOutputMessageBatch]]):
														
 
															     def __init__(
														
 
															         self,
														
 
															-        next_step: ProcessingStep[Union[KafkaPayload, RoutingPayload]],
														
 
															+        next_step: ProcessingStep[Union[FilteredPayload, KafkaPayload, RoutingPayload]],
														
 
															     ) -> None:
														
 
															         self.__next_step = next_step
														
 
															         self.__closed = False
														
 
															+        self._invalid_msg_meta: Deque[NamedTuple] = deque()
														
 
															     def poll(self) -> None:
														
 
															+        if self._invalid_msg_meta:
														
 
															+            partition, offset = self._invalid_msg_meta.popleft()
														
 
															+            raise InvalidMessage(partition, offset)
														
 
															+
														
 
															         self.__next_step.poll()
														
 
															     def submit(self, message: Message[Union[FilteredPayload, IndexerOutputMessageBatch]]) -> None:
														
 
															         assert not self.__closed
														
 
															-        # FilteredPayloads are not handled in the indexer
														
 
															-        for transformed_message in cast(IndexerOutputMessageBatch, message.payload).data:
														
 
															+        if isinstance(message.payload, FilteredPayload):
														
 
															+            self.__next_step.submit(cast(Message[KafkaPayload], message))
														
 
															+            return
														
 
															+
														
 
															+        self._invalid_msg_meta.extend(message.payload.invalid_msg_meta)
														
 
															+
														
 
															+        _ = message.payload.cogs_data
														
 
															+
														
 
															+        for transformed_message in message.payload.data:
														
 
															             self.__next_step.submit(transformed_message)
														
 
															     def close(self) -> None:
														
--- a/tests/sentry/sentry_metrics/test_batch.py
+++ b/tests/sentry/sentry_metrics/test_batch.py
@@ -250,6 +250,7 @@ def test_extract_strings_with_rollout(should_index_tag_values, expected):
 
															     )
														
 
															     assert batch.extract_strings() == expected
														
 
															+    assert not batch.invalid_msg_meta
														
 
															 @pytest.mark.django_db
														
@@ -413,6 +414,7 @@ def test_extract_strings_with_single_use_case_ids_blocked():
 
															             }
														
 
															         }
														
 
															     }
														
 
															+    assert not batch.invalid_msg_meta
														
 
															 @override_options({"sentry-metrics.indexer.disabled-namespaces": ["spans", "escalating_issues"]})
														
@@ -485,6 +487,7 @@ def test_extract_strings_with_multiple_use_case_ids_blocked():
 
															             }
														
 
															         },
														
 
															     }
														
 
															+    assert not batch.invalid_msg_meta
														
 
															 @pytest.mark.django_db
														
@@ -587,6 +590,7 @@ def test_extract_strings_with_invalid_mri():
 
															             }
														
 
															         },
														
 
															     }
														
 
															+    assert batch.invalid_msg_meta == {BrokerMeta(Partition(Topic("topic"), 0), 0)}
														
 
															 @pytest.mark.django_db
														
@@ -677,6 +681,7 @@ def test_extract_strings_with_multiple_use_case_ids_and_org_ids():
 
															             }
														
 
															         },
														
 
															     }
														
 
															+    assert not batch.invalid_msg_meta
														
 
															 @pytest.mark.django_db
														
@@ -729,6 +734,7 @@ def test_resolved_with_aggregation_options(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -869,6 +875,7 @@ def test_all_resolved(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1179,6 +1186,7 @@ def test_all_resolved_retention_days_honored(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1331,6 +1339,7 @@ def test_batch_resolve_with_values_not_indexed(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1475,6 +1484,7 @@ def test_metric_id_rate_limited(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1589,6 +1599,7 @@ def test_tag_key_rate_limited(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1680,6 +1691,7 @@ def test_tag_value_rate_limited(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1822,6 +1834,7 @@ def test_one_org_limited(caplog, settings):
 
															             }
														
 
															         }
														
 
															     )
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     caplog.set_level(logging.ERROR)
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
@@ -1954,6 +1967,7 @@ def test_cardinality_limiter(caplog, settings):
 
															             },
														
 
															         }
														
 
															     }
														
 
															+    assert not batch.invalid_msg_meta
														
 
															     snuba_payloads = batch.reconstruct_messages(
														
 
															         {
														
--- a/tests/sentry/sentry_metrics/test_gen_metrics_multiprocess_steps.py
+++ b/tests/sentry/sentry_metrics/test_gen_metrics_multiprocess_steps.py
@@ -4,6 +4,7 @@ import logging
 
															 import pickle
														
 
															 import re
														
 
															 import time
														
 
															+from collections import deque
														
 
															 from copy import deepcopy
														
 
															 from datetime import datetime, timezone
														
 
															 from typing import Any, Dict, List, MutableMapping, Sequence, Union
														
@@ -20,6 +21,7 @@ from sentry.sentry_metrics.configuration import IndexerStorage, UseCaseKey, get_
 
															 from sentry.sentry_metrics.consumers.indexer.batch import valid_metric_name
														
 
															 from sentry.sentry_metrics.consumers.indexer.common import (
														
 
															     BatchMessages,
														
 
															+    BrokerMeta,
														
 
															     IndexerOutputMessageBatch,
														
 
															     MetricsBatchBuilder,
														
 
															 )
														
@@ -365,6 +367,7 @@ def test_process_messages() -> None:
 
															         )
														
 
															     compare_message_batches_ignoring_metadata(new_batch, expected_new_batch)
														
 
															+    assert not new_batch.invalid_msg_meta
														
 
															 @pytest.mark.django_db
														
@@ -391,7 +394,9 @@ def test_process_messages_default_card_rollout(set_sentry_option) -> None:
 
															         1.0,
														
 
															     ):
														
 
															         new_batch = MESSAGE_PROCESSOR.process_messages(outer_message=outer_message)
														
 
															-        assert len(new_batch.data) == len(message_batch)
														
 
															+
														
 
															+    assert len(new_batch.data) == len(message_batch)
														
 
															+    assert not new_batch.invalid_msg_meta
														
 
															 invalid_payloads = [
														
@@ -501,6 +506,7 @@ def test_process_messages_invalid_messages(
 
															     ]
														
 
															     compare_message_batches_ignoring_metadata(new_batch, expected_new_batch)
														
 
															     assert error_text in caplog.text
														
 
															+    assert new_batch.invalid_msg_meta == deque([BrokerMeta(Partition(Topic("topic"), 0), 1)])
														
 
															 @pytest.mark.django_db
														
@@ -572,6 +578,7 @@ def test_process_messages_rate_limited(caplog, settings) -> None:
 
															     ]
														
 
															     compare_message_batches_ignoring_metadata(new_batch, expected_new_batch)
														
 
															     assert "dropped_message" in caplog.text
														
 
															+    assert not new_batch.invalid_msg_meta
														
 
															 @pytest.mark.django_db
														
@@ -624,6 +631,8 @@ def test_process_messages_cardinality_limited(
 
															         compare_message_batches_ignoring_metadata(new_batch, [])
														
 
															+    assert not new_batch.invalid_msg_meta
														
 
															+
														
 
															 def test_valid_metric_name() -> None:
														
 
															     assert valid_metric_name("") is True
														
--- a/tests/sentry/sentry_metrics/test_rh_metrics_multiprocess_steps.py
+++ b/tests/sentry/sentry_metrics/test_rh_metrics_multiprocess_steps.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 
															 import logging
														
 
															 import pickle
														
 
															 import time
														
 
															+from collections import deque
														
 
															 from copy import deepcopy
														
 
															 from datetime import datetime, timezone
														
 
															 from typing import Any, Dict, List, MutableMapping, Sequence, Union
														
@@ -18,6 +19,7 @@ from sentry.sentry_metrics.configuration import IndexerStorage, UseCaseKey, get_
 
															 from sentry.sentry_metrics.consumers.indexer.batch import valid_metric_name
														
 
															 from sentry.sentry_metrics.consumers.indexer.common import (
														
 
															     BatchMessages,
														
 
															+    BrokerMeta,
														
 
															     IndexerOutputMessageBatch,
														
 
															     MetricsBatchBuilder,
														
 
															 )
														
@@ -340,6 +342,7 @@ def test_process_messages() -> None:
 
															             )
														
 
															         )
														
 
															     compare_message_batches_ignoring_metadata(new_batch, expected_new_batch)
														
 
															+    assert not new_batch.invalid_msg_meta
														
 
															 invalid_payloads = [
														
@@ -449,6 +452,7 @@ def test_process_messages_invalid_messages(
 
															     ]
														
 
															     compare_message_batches_ignoring_metadata(new_batch, expected_new_batch)
														
 
															     assert error_text in caplog.text
														
 
															+    assert new_batch.invalid_msg_meta == deque([BrokerMeta(Partition(Topic("topic"), 0), 1)])
														
 
															 @pytest.mark.django_db
														
@@ -516,6 +520,7 @@ def test_process_messages_rate_limited(caplog, settings) -> None:
 
															     ]
														
 
															     compare_message_batches_ignoring_metadata(new_batch, expected_new_batch)
														
 
															     assert "dropped_message" in caplog.text
														
 
															+    assert not new_batch.invalid_msg_meta
														
 
															 @pytest.mark.django_db
														
@@ -567,6 +572,7 @@ def test_process_messages_cardinality_limited(
 
															             new_batch = MESSAGE_PROCESSOR.process_messages(outer_message=outer_message)
														
 
															         compare_message_batches_ignoring_metadata(new_batch, [])
														
 
															+        assert not new_batch.invalid_msg_meta
														
 
															 def test_valid_metric_name() -> None: