|
@@ -2,12 +2,12 @@ import logging
|
|
|
from collections.abc import MutableMapping
|
|
|
from datetime import datetime, timezone
|
|
|
from enum import Enum
|
|
|
+from typing import Any
|
|
|
from unittest.mock import patch
|
|
|
|
|
|
import pytest
|
|
|
import sentry_kafka_schemas
|
|
|
from arroyo.backends.kafka import KafkaPayload
|
|
|
-from arroyo.codecs.json import JsonCodec
|
|
|
from arroyo.types import BrokerValue, Message, Partition, Topic, Value
|
|
|
|
|
|
from sentry.sentry_metrics.consumers.indexer.batch import IndexerBatch, PartitionIdxOffset
|
|
@@ -84,7 +84,9 @@ extracted_string_output = {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-_INGEST_SCHEMA = JsonCodec(schema=sentry_kafka_schemas.get_schema("ingest-metrics")["schema"])
|
|
|
+_INGEST_CODEC: sentry_kafka_schemas.codecs.Codec[Any] = sentry_kafka_schemas.get_codec(
|
|
|
+ "ingest-metrics"
|
|
|
+)
|
|
|
|
|
|
|
|
|
def _construct_messages(payloads):
|
|
@@ -129,11 +131,10 @@ def _deconstruct_messages(snuba_messages, kafka_logical_topic="snuba-metrics"):
|
|
|
|
|
|
rv = []
|
|
|
|
|
|
- codec = JsonCodec(schema=sentry_kafka_schemas.get_schema(kafka_logical_topic)["schema"])
|
|
|
+ codec = sentry_kafka_schemas.get_codec(kafka_logical_topic)
|
|
|
|
|
|
for msg in snuba_messages:
|
|
|
- decoded = json.loads(msg.payload.value.decode("utf-8"))
|
|
|
- codec.validate(decoded)
|
|
|
+ decoded = codec.decode(msg.payload.value, validate=True)
|
|
|
rv.append((decoded, msg.payload.headers))
|
|
|
|
|
|
return rv
|
|
@@ -236,7 +237,7 @@ def test_extract_strings_with_rollout(should_index_tag_values, expected):
|
|
|
outer_message,
|
|
|
should_index_tag_values,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
|
|
|
assert batch.extract_strings() == expected
|
|
@@ -301,7 +302,7 @@ def test_extract_strings_with_multiple_use_case_ids():
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == {
|
|
|
MockUseCaseID.USE_CASE_1: {
|
|
@@ -402,7 +403,7 @@ def test_extract_strings_with_invalid_mri():
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == {
|
|
|
MockUseCaseID.USE_CASE_1: {
|
|
@@ -488,7 +489,7 @@ def test_extract_strings_with_multiple_use_case_ids_and_org_ids():
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == {
|
|
|
MockUseCaseID.USE_CASE_1: {
|
|
@@ -534,7 +535,7 @@ def test_all_resolved(caplog, settings):
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
@@ -677,7 +678,7 @@ def test_all_resolved_with_routing_information(caplog, settings):
|
|
|
outer_message,
|
|
|
True,
|
|
|
True,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
@@ -833,7 +834,7 @@ def test_all_resolved_retention_days_honored(caplog, settings):
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
@@ -984,7 +985,7 @@ def test_batch_resolve_with_values_not_indexed(caplog, settings):
|
|
|
outer_message,
|
|
|
False,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
@@ -1113,7 +1114,7 @@ def test_metric_id_rate_limited(caplog, settings):
|
|
|
]
|
|
|
)
|
|
|
|
|
|
- batch = IndexerBatch(outer_message, True, False, arroyo_input_codec=_INGEST_SCHEMA)
|
|
|
+ batch = IndexerBatch(outer_message, True, False, input_codec=_INGEST_CODEC)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
|
MockUseCaseID.SESSIONS: {
|
|
@@ -1216,7 +1217,7 @@ def test_tag_key_rate_limited(caplog, settings):
|
|
|
]
|
|
|
)
|
|
|
|
|
|
- batch = IndexerBatch(outer_message, True, False, arroyo_input_codec=_INGEST_SCHEMA)
|
|
|
+ batch = IndexerBatch(outer_message, True, False, input_codec=_INGEST_CODEC)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
|
MockUseCaseID.SESSIONS: {
|
|
@@ -1297,7 +1298,7 @@ def test_tag_value_rate_limited(caplog, settings):
|
|
|
]
|
|
|
)
|
|
|
|
|
|
- batch = IndexerBatch(outer_message, True, False, arroyo_input_codec=_INGEST_SCHEMA)
|
|
|
+ batch = IndexerBatch(outer_message, True, False, input_codec=_INGEST_CODEC)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
|
MockUseCaseID.SESSIONS: {
|
|
@@ -1428,7 +1429,7 @@ def test_one_org_limited(caplog, settings):
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
assert batch.extract_strings() == (
|
|
|
{
|
|
@@ -1553,7 +1554,7 @@ def test_cardinality_limiter(caplog, settings):
|
|
|
outer_message,
|
|
|
True,
|
|
|
False,
|
|
|
- arroyo_input_codec=_INGEST_SCHEMA,
|
|
|
+ input_codec=_INGEST_CODEC,
|
|
|
)
|
|
|
keys_to_remove = list(batch.parsed_payloads_by_offset)[:2]
|
|
|
# the messages come in a certain order, and Python dictionaries preserve
|