Browse Source

feat(generic-metrics): Replace `bulk_record`/`record` with `_uca_bulk_record`/`_uca_record` (#48813)

John 1 year ago
parent
commit
1ae87c0169

+ 13 - 12
src/sentry/sentry_metrics/consumers/indexer/batch.py

@@ -257,8 +257,8 @@ class IndexerBatch:
     @metrics.wraps("process_messages.reconstruct_messages")
     def reconstruct_messages(
         self,
-        mapping: Mapping[OrgId, Mapping[str, Optional[int]]],
-        bulk_record_meta: Mapping[OrgId, Mapping[str, Metadata]],
+        mapping: Mapping[UseCaseID, Mapping[OrgId, Mapping[str, Optional[int]]]],
+        bulk_record_meta: Mapping[UseCaseID, Mapping[OrgId, Mapping[str, Metadata]]],
     ) -> IndexerOutputMessageBatch:
         new_messages: IndexerOutputMessageBatch = []
 
@@ -282,6 +282,7 @@ class IndexerBatch:
 
             metric_name = old_payload_value["name"]
             org_id = old_payload_value["org_id"]
+            use_case_id = old_payload_value["use_case_id"]
             sentry_sdk.set_tag("sentry_metrics.organization_id", org_id)
             tags = old_payload_value.get("tags", {})
             used_tags.add(metric_name)
@@ -293,9 +294,9 @@ class IndexerBatch:
             try:
                 for k, v in tags.items():
                     used_tags.update({k, v})
-                    new_k = mapping[org_id][k]
+                    new_k = mapping[use_case_id][org_id][k]
                     if new_k is None:
-                        metadata = bulk_record_meta[org_id].get(k)
+                        metadata = bulk_record_meta[use_case_id][org_id].get(k)
                         if (
                             metadata
                             and metadata.fetch_type_ext
@@ -308,9 +309,9 @@ class IndexerBatch:
 
                     value_to_write: Union[int, str] = v
                     if self.__should_index_tag_values:
-                        new_v = mapping[org_id][v]
+                        new_v = mapping[use_case_id][org_id][v]
                         if new_v is None:
-                            metadata = bulk_record_meta[org_id].get(v)
+                            metadata = bulk_record_meta[use_case_id][org_id].get(v)
                             if (
                                 metadata
                                 and metadata.fetch_type_ext
@@ -344,15 +345,15 @@ class IndexerBatch:
                             "string_type": "tags",
                             "num_global_quotas": exceeded_global_quotas,
                             "num_org_quotas": exceeded_org_quotas,
-                            "org_batch_size": len(mapping[org_id]),
+                            "org_batch_size": len(mapping[use_case_id][org_id]),
                         },
                     )
                 continue
 
             fetch_types_encountered = set()
             for tag in used_tags:
-                if tag in bulk_record_meta[org_id]:
-                    metadata = bulk_record_meta[org_id][tag]
+                if tag in bulk_record_meta[use_case_id][org_id]:
+                    metadata = bulk_record_meta[use_case_id][org_id][tag]
                     fetch_types_encountered.add(metadata.fetch_type)
                     output_message_meta[metadata.fetch_type.value][str(metadata.id)] = tag
 
@@ -360,9 +361,9 @@ class IndexerBatch:
                 "".join(sorted(t.value for t in fetch_types_encountered)), "utf-8"
             )
 
-            numeric_metric_id = mapping[org_id][metric_name]
+            numeric_metric_id = mapping[use_case_id][org_id][metric_name]
             if numeric_metric_id is None:
-                metadata = bulk_record_meta[org_id].get(metric_name)
+                metadata = bulk_record_meta[use_case_id][org_id].get(metric_name)
                 metrics.incr(
                     "sentry_metrics.indexer.process_messages.dropped_message",
                     tags={
@@ -380,7 +381,7 @@ class IndexerBatch:
                                 and metadata.fetch_type_ext
                                 and metadata.fetch_type_ext.is_global
                             ),
-                            "org_batch_size": len(mapping[org_id]),
+                            "org_batch_size": len(mapping[use_case_id][org_id]),
                         },
                     )
                 continue

+ 2 - 5
src/sentry/sentry_metrics/consumers/indexer/processing.py

@@ -107,14 +107,11 @@ class MessageProcessor:
         batch.filter_messages(cardinality_limiter_state.keys_to_remove)
 
         extracted_strings = batch.extract_strings()
-        org_strings = next(iter(extracted_strings.values())) if extracted_strings else {}
 
-        sdk.set_measurement("org_strings.len", len(org_strings))
+        sdk.set_measurement("org_strings.len", len(extracted_strings))
 
         with metrics.timer("metrics_consumer.bulk_record"), sentry_sdk.start_span(op="bulk_record"):
-            record_result = self._indexer.bulk_record(
-                use_case_id=self._config.use_case_id, org_strings=org_strings
-            )
+            record_result = self._indexer.bulk_record(extracted_strings)
 
         mapping = record_result.get_mapped_results()
         bulk_record_meta = record_result.get_fetch_metadata()

+ 1 - 46
src/sentry/sentry_metrics/indexer/base.py

@@ -413,51 +413,6 @@ class StringIndexer(Service):
     )
 
     def bulk_record(
-        self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]]
-    ) -> KeyResults:
-        """
-        Takes in a mapping with org_ids to sets of strings.
-
-        Ultimately returns a mapping of those org_ids to a
-        string -> id mapping, for each string in the set.
-
-        There are three steps to getting the ids for strings:
-            0. ids from static strings (StaticStringIndexer)
-            1. ids from cache (CachingIndexer)
-            2. ids from existing db records (postgres/spanner)
-            3. ids that have been rate limited (postgres/spanner)
-            4. ids from newly created db records (postgres/spanner)
-
-        Each step will start off with a KeyCollection and KeyResults:
-            keys = KeyCollection(mapping)
-            key_results = KeyResults()
-
-        Then the work to get the ids (either from cache, db, etc)
-            .... # work to add results to KeyResults()
-
-        Those results will be added to `mapped_results` which can
-        be retrieved
-            key_results.get_mapped_results()
-
-        Remaining unmapped keys get turned into a new
-        KeyCollection for the next step:
-            new_keys = key_results.get_unmapped_keys(mapping)
-
-        When the last step is reached or a step resolves all the remaining
-        unmapped keys the key_results objects are merged and returned:
-            e.g. return cache_key_results.merge(db_read_key_results)
-        """
-        raise NotImplementedError()
-
-    def record(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:
-        """Store a string and return the integer ID generated for it
-
-        With every call to this method, the lifetime of the entry will be
-        prolonged.
-        """
-        raise NotImplementedError()
-
-    def _uca_bulk_record(
         self, strings: Mapping[UseCaseID, Mapping[OrgId, Set[str]]]
     ) -> UseCaseKeyResults:
         """
@@ -487,7 +442,7 @@ class StringIndexer(Service):
         """
         raise NotImplementedError()
 
-    def _uca_record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
+    def record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
         """Store a string and return the integer ID generated for it
         With every call to this method, the lifetime of the entry will be
         prolonged.

+ 3 - 15
src/sentry/sentry_metrics/indexer/cache.py

@@ -8,7 +8,6 @@ from django.core.cache import caches
 from sentry.sentry_metrics.configuration import UseCaseKey
 from sentry.sentry_metrics.indexer.base import (
     FetchType,
-    KeyResults,
     OrgId,
     StringIndexer,
     UseCaseKeyCollection,
@@ -103,17 +102,6 @@ class CachingIndexer(StringIndexer):
         self.indexer = indexer
 
     def bulk_record(
-        self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]]
-    ) -> KeyResults:
-        res = self._uca_bulk_record({REVERSE_METRIC_PATH_MAPPING[use_case_id]: org_strings})
-        return res.results[REVERSE_METRIC_PATH_MAPPING[use_case_id]]
-
-    def record(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:
-        """Store a string and return the integer ID generated for it"""
-        result = self.bulk_record(use_case_id=use_case_id, org_strings={org_id: {string}})
-        return result[org_id][string]
-
-    def _uca_bulk_record(
         self, strings: Mapping[UseCaseID, Mapping[OrgId, Set[str]]]
     ) -> UseCaseKeyResults:
         cache_keys = UseCaseKeyCollection(strings)
@@ -152,7 +140,7 @@ class CachingIndexer(StringIndexer):
         if db_record_keys.size == 0:
             return cache_key_results
 
-        db_record_key_results = self.indexer._uca_bulk_record(
+        db_record_key_results = self.indexer.bulk_record(
             {
                 use_case_id: key_collection.mapping
                 for use_case_id, key_collection in db_record_keys.mapping.items()
@@ -163,8 +151,8 @@ class CachingIndexer(StringIndexer):
 
         return cache_key_results.merge(db_record_key_results)
 
-    def _uca_record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
-        result = self._uca_bulk_record(strings={use_case_id: {org_id: {string}}})
+    def record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
+        result = self.bulk_record(strings={use_case_id: {org_id: {string}}})
         return result[use_case_id][org_id][string]
 
     def resolve(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:

+ 1 - 12
src/sentry/sentry_metrics/indexer/mock.py

@@ -5,7 +5,6 @@ from typing import DefaultDict, Dict, Mapping, Optional, Set
 from sentry.sentry_metrics.configuration import UseCaseKey
 from sentry.sentry_metrics.indexer.base import (
     FetchType,
-    KeyResults,
     OrgId,
     StringIndexer,
     UseCaseKeyCollection,
@@ -28,16 +27,6 @@ class RawSimpleIndexer(StringIndexer):
         self._reverse: Dict[int, str] = {}
 
     def bulk_record(
-        self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]]
-    ) -> KeyResults:
-        res = self._uca_bulk_record({REVERSE_METRIC_PATH_MAPPING[use_case_id]: org_strings})
-        return res.results[REVERSE_METRIC_PATH_MAPPING[use_case_id]]
-
-    def record(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:
-        res = self._uca_bulk_record({REVERSE_METRIC_PATH_MAPPING[use_case_id]: {org_id: {string}}})
-        return res.results[REVERSE_METRIC_PATH_MAPPING[use_case_id]][org_id][string]
-
-    def _uca_bulk_record(
         self, strings: Mapping[UseCaseID, Mapping[OrgId, Set[str]]]
     ) -> UseCaseKeyResults:
         db_read_keys = UseCaseKeyCollection(strings)
@@ -71,7 +60,7 @@ class RawSimpleIndexer(StringIndexer):
 
         return db_read_key_results.merge(db_write_key_results)
 
-    def _uca_record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
+    def record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
         return self._record(use_case_id, org_id, string)
 
     def resolve(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:

+ 3 - 19
src/sentry/sentry_metrics/indexer/postgres/postgres_v2.py

@@ -12,7 +12,6 @@ from psycopg2.errorcodes import DEADLOCK_DETECTED
 from sentry.sentry_metrics.configuration import IndexerStorage, UseCaseKey, get_ingest_config
 from sentry.sentry_metrics.indexer.base import (
     FetchType,
-    KeyResults,
     OrgId,
     StringIndexer,
     UseCaseKeyCollection,
@@ -23,11 +22,7 @@ from sentry.sentry_metrics.indexer.cache import CachingIndexer, StringIndexerCac
 from sentry.sentry_metrics.indexer.limiters.writes import writes_limiter_factory
 from sentry.sentry_metrics.indexer.postgres.models import TABLE_MAPPING, BaseIndexer, IndexerTable
 from sentry.sentry_metrics.indexer.strings import StaticStringIndexer
-from sentry.sentry_metrics.use_case_id_registry import (
-    METRIC_PATH_MAPPING,
-    REVERSE_METRIC_PATH_MAPPING,
-    UseCaseID,
-)
+from sentry.sentry_metrics.use_case_id_registry import METRIC_PATH_MAPPING, UseCaseID
 from sentry.utils import metrics
 
 __all__ = ["PostgresIndexer"]
@@ -106,17 +101,6 @@ class PGStringIndexerV2(StringIndexer):
             raise last_seen_exception
 
     def bulk_record(
-        self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]]
-    ) -> KeyResults:
-        res = self._uca_bulk_record({REVERSE_METRIC_PATH_MAPPING[use_case_id]: org_strings})
-        return res.results[REVERSE_METRIC_PATH_MAPPING[use_case_id]]
-
-    def record(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:
-        """Store a string and return the integer ID generated for it"""
-        result = self.bulk_record(use_case_id=use_case_id, org_strings={org_id: {string}})
-        return result[org_id][string]
-
-    def _uca_bulk_record(
         self, strings: Mapping[UseCaseID, Mapping[OrgId, Set[str]]]
     ) -> UseCaseKeyResults:
         db_read_keys = UseCaseKeyCollection(strings)
@@ -244,8 +228,8 @@ class PGStringIndexerV2(StringIndexer):
 
         return db_read_key_results.merge(db_write_key_results).merge(rate_limited_key_results)
 
-    def _uca_record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
-        result = self._uca_bulk_record(strings={use_case_id: {org_id: {string}}})
+    def record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
+        result = self.bulk_record(strings={use_case_id: {org_id: {string}}})
         return result[use_case_id][org_id][string]
 
     def resolve(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:

+ 4 - 14
src/sentry/sentry_metrics/indexer/strings.py

@@ -3,14 +3,13 @@ from typing import Mapping, Optional, Set
 from sentry.sentry_metrics.configuration import UseCaseKey
 from sentry.sentry_metrics.indexer.base import (
     FetchType,
-    KeyResults,
     OrgId,
     StringIndexer,
     UseCaseKeyCollection,
     UseCaseKeyResult,
     UseCaseKeyResults,
 )
-from sentry.sentry_metrics.use_case_id_registry import REVERSE_METRIC_PATH_MAPPING, UseCaseID
+from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 
 # !!! DO NOT CHANGE THESE VALUES !!!
 #
@@ -168,15 +167,6 @@ class StaticStringIndexer(StringIndexer):
         self.indexer = indexer
 
     def bulk_record(
-        self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]]
-    ) -> KeyResults:
-        res = self._uca_bulk_record({REVERSE_METRIC_PATH_MAPPING[use_case_id]: org_strings})
-        return res.results[REVERSE_METRIC_PATH_MAPPING[use_case_id]]
-
-    def record(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:
-        return self._uca_record(REVERSE_METRIC_PATH_MAPPING[use_case_id], org_id, string)
-
-    def _uca_bulk_record(
         self, strings: Mapping[UseCaseID, Mapping[OrgId, Set[str]]]
     ) -> UseCaseKeyResults:
         static_keys = UseCaseKeyCollection(strings)
@@ -193,7 +183,7 @@ class StaticStringIndexer(StringIndexer):
         if org_strings_left.size == 0:
             return static_key_results
 
-        indexer_results = self.indexer._uca_bulk_record(
+        indexer_results = self.indexer.bulk_record(
             {
                 use_case_id: key_collection.mapping
                 for use_case_id, key_collection in org_strings_left.mapping.items()
@@ -202,10 +192,10 @@ class StaticStringIndexer(StringIndexer):
 
         return static_key_results.merge(indexer_results)
 
-    def _uca_record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
+    def record(self, use_case_id: UseCaseID, org_id: int, string: str) -> Optional[int]:
         if string in SHARED_STRINGS:
             return SHARED_STRINGS[string]
-        return self.indexer._uca_record(use_case_id=use_case_id, org_id=org_id, string=string)
+        return self.indexer.record(use_case_id=use_case_id, org_id=org_id, string=string)
 
     def resolve(self, use_case_id: UseCaseKey, org_id: int, string: str) -> Optional[int]:
         if string in SHARED_STRINGS:

+ 19 - 5
src/sentry/testutils/cases.py

@@ -2,6 +2,8 @@ from __future__ import annotations
 
 import responses
 
+from sentry.sentry_metrics.use_case_id_registry import REVERSE_METRIC_PATH_MAPPING, UseCaseID
+
 __all__ = (
     "TestCase",
     "TransactionTestCase",
@@ -1258,21 +1260,33 @@ class BaseMetricsTestCase(SnubaTestCase):
 
         def metric_id(key: str):
             assert isinstance(key, str)
-            res = indexer.record(use_case_id=use_case_id, org_id=org_id, string=key)
+            res = indexer.record(
+                use_case_id=REVERSE_METRIC_PATH_MAPPING[use_case_id],
+                org_id=org_id,
+                string=key,
+            )
             assert res is not None, key
             mapping_meta[str(res)] = key
             return res
 
         def tag_key(name):
             assert isinstance(name, str)
-            res = indexer.record(use_case_id=use_case_id, org_id=org_id, string=name)
+            res = indexer.record(
+                use_case_id=REVERSE_METRIC_PATH_MAPPING[use_case_id],
+                org_id=org_id,
+                string=name,
+            )
             assert res is not None, name
             mapping_meta[str(res)] = name
             return res
 
         def tag_value(name):
             assert isinstance(name, str)
-            res = indexer.record(use_case_id=use_case_id, org_id=org_id, string=name)
+            res = indexer.record(
+                use_case_id=REVERSE_METRIC_PATH_MAPPING[use_case_id],
+                org_id=org_id,
+                string=name,
+            )
             assert res is not None, name
             mapping_meta[str(res)] = name
             return res
@@ -1565,7 +1579,7 @@ class MetricsEnhancedPerformanceTestCase(BaseMetricsLayerTestCase, TestCase):
             *list(METRICS_MAP.values()),
         ]
         org_strings = {self.organization.id: set(strings)}
-        indexer.bulk_record(use_case_id=UseCaseKey.PERFORMANCE, org_strings=org_strings)
+        indexer.bulk_record({UseCaseID.TRANSACTIONS: org_strings})
 
     def store_transaction_metric(
         self,
@@ -2247,7 +2261,7 @@ class MetricsAPIBaseTestCase(BaseMetricsLayerTestCase, APITestCase):
 
 class OrganizationMetricMetaIntegrationTestCase(MetricsAPIBaseTestCase):
     def __indexer_record(self, org_id: int, value: str) -> int:
-        return indexer.record(use_case_id=UseCaseKey.RELEASE_HEALTH, org_id=org_id, string=value)
+        return indexer.record(use_case_id=UseCaseID.SESSIONS, org_id=org_id, string=value)
 
     def setUp(self):
         super().setUp()

+ 5 - 5
tests/sentry/api/endpoints/test_organization_metric_data.py

@@ -8,7 +8,7 @@ import pytest
 from freezegun import freeze_time
 
 from sentry.sentry_metrics import indexer
-from sentry.sentry_metrics.configuration import UseCaseKey
+from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 from sentry.snuba.metrics.naming_layer.mri import ParsedMRI, SessionMRI, TransactionMRI
 from sentry.snuba.metrics.naming_layer.public import (
     SessionMetricKey,
@@ -23,12 +23,12 @@ from sentry.utils.cursors import Cursor
 from tests.sentry.api.endpoints.test_organization_metrics import MOCKED_DERIVED_METRICS
 
 
-def indexer_record(use_case_id: UseCaseKey, org_id: int, string: str) -> int:
-    return indexer.record(use_case_id=use_case_id, org_id=org_id, string=string)
+def indexer_record(use_case_id: UseCaseID, org_id: int, string: str) -> int:
+    return indexer.record(use_case_id, org_id, string)
 
 
-perf_indexer_record = partial(indexer_record, UseCaseKey.PERFORMANCE)
-rh_indexer_record = partial(indexer_record, UseCaseKey.RELEASE_HEALTH)
+perf_indexer_record = partial(indexer_record, UseCaseID.TRANSACTIONS)
+rh_indexer_record = partial(indexer_record, UseCaseID.SESSIONS)
 
 pytestmark = [pytest.mark.sentry_metrics]
 

+ 3 - 3
tests/sentry/api/endpoints/test_organization_metric_details.py

@@ -5,7 +5,7 @@ from unittest.mock import patch
 import pytest
 
 from sentry.sentry_metrics import indexer
-from sentry.sentry_metrics.configuration import UseCaseKey
+from sentry.sentry_metrics.use_case_id_registry import UseCaseID
 from sentry.sentry_metrics.utils import resolve_weak
 from sentry.snuba.metrics import SingularEntityDerivedMetric
 from sentry.snuba.metrics.fields.snql import complement, division_float
@@ -37,7 +37,7 @@ pytestmark = pytest.mark.sentry_metrics
 
 
 def _indexer_record(org_id: int, string: str) -> int:
-    return indexer.record(use_case_id=UseCaseKey.RELEASE_HEALTH, org_id=org_id, string=string)
+    return indexer.record(use_case_id=UseCaseID.SESSIONS, org_id=org_id, string=string)
 
 
 @region_silo_test(stable=True)
@@ -242,7 +242,7 @@ class OrganizationMetricDetailsIntegrationTest(OrganizationMetricMetaIntegration
         """
         mocked_derived_metrics.return_value = MOCKED_DERIVED_METRICS_2
         org_id = self.project.organization.id
-        use_key_id = UseCaseKey.RELEASE_HEALTH
+        use_key_id = UseCaseID.SESSIONS
         metric_id = _indexer_record(org_id, "metric_foo_doe")
 
         self.store_session(

Some files were not shown because too many files changed in this diff