2 months ago · 8edb1f5aac
--- a/src/sentry/tsdb/base.py
+++ b/src/sentry/tsdb/base.py
@@ -117,11 +117,7 @@ class BaseTSDB(Service):
 
															             "get_sums",
														
 
															             "get_distinct_counts_series",
														
 
															             "get_distinct_counts_totals",
														
 
															-            "get_distinct_counts_union",
														
 
															-            "get_most_frequent",
														
 
															-            "get_most_frequent_series",
														
 
															             "get_frequency_series",
														
 
															-            "get_frequency_totals",
														
 
															             "get_distinct_counts_totals_with_conditions",
														
 
															         ]
														
 
															     )
														
@@ -574,22 +570,6 @@ class BaseTSDB(Service):
 
															         """
														
 
															         raise NotImplementedError
														
 
															-    def get_distinct_counts_union(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: list[int] | None,
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> int:
														
 
															-        """
														
 
															-        Count the total number of distinct items across multiple counters
														
 
															-        during a time range.
														
 
															-        """
														
 
															-        raise NotImplementedError
														
 
															-
														
 
															     def merge_distinct_counts(
														
 
															         self,
														
 
															         model: TSDBModel,
														
@@ -632,52 +612,6 @@ class BaseTSDB(Service):
 
															         """
														
 
															         raise NotImplementedError
														
 
															-    def get_most_frequent(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: Sequence[TSDBKey],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        limit: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[TSDBKey, list[tuple[str, float]]]:
														
 
															-        """
														
 
															-        Retrieve the most frequently seen items in a frequency table.
														
 
															-
														
 
															-        Results are returned as a mapping, where the key is the key requested
														
 
															-        and the value is a list of ``(member, score)`` tuples, ordered by the
														
 
															-        highest (most frequent) to lowest (least frequent) score. The maximum
														
 
															-        number of items returned is ``index capacity * rollup intervals`` if no
														
 
															-        ``limit`` is provided.
														
 
															-        """
														
 
															-        raise NotImplementedError
														
 
															-
														
 
															-    def get_most_frequent_series(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: Iterable[str],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        limit: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[str, list[tuple[int, dict[str, float]]]]:
														
 
															-        """
														
 
															-        Retrieve the most frequently seen items in a frequency table for each
														
 
															-        interval in a series. (This is in contrast with ``get_most_frequent``,
														
 
															-        which returns the most frequent items seen over the entire requested
														
 
															-        range.)
														
 
															-
														
 
															-        Results are returned as a mapping, where the key is the key requested
														
 
															-        and the value is a list of ``(timestamp, {item: score, ...})`` pairs
														
 
															-        over the series. The maximum number of items returned for each interval
														
 
															-        is the index capacity if no ``limit`` is provided.
														
 
															-        """
														
 
															-        raise NotImplementedError
														
 
															-
														
 
															     def get_frequency_series(
														
 
															         self,
														
 
															         model: TSDBModel,
														
@@ -701,29 +635,6 @@ class BaseTSDB(Service):
 
															         """
														
 
															         raise NotImplementedError
														
 
															-    def get_frequency_totals(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        items: Mapping[TSDBKey, Sequence[TSDBItem]],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[TSDBKey, dict[TSDBItem, float]]:
														
 
															-        """
														
 
															-        Retrieve the total frequency of known items in a table over time.
														
 
															-
														
 
															-        The items requested should be passed as a mapping, where the key is the
														
 
															-        metric key, and the value is a sequence of members to retrieve scores
														
 
															-        for.
														
 
															-
														
 
															-        Results are returned as a mapping, where the key is the key requested
														
 
															-        and the value is a mapping of ``{item: score, ...}`` containing the
														
 
															-        total score of items over the interval.
														
 
															-        """
														
 
															-        raise NotImplementedError
														
 
															-
														
 
															     def merge_frequencies(
														
 
															         self,
														
 
															         model: TSDBModel,
														
--- a/src/sentry/tsdb/dummy.py
+++ b/src/sentry/tsdb/dummy.py
@@ -93,19 +93,6 @@ class DummyTSDB(BaseTSDB):
 
															         self.validate_arguments([model], [environment_id])
														
 
															         return 0
														
 
															-    def get_distinct_counts_union(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: list[int] | None,
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> int:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-        return 0
														
 
															-
														
 
															     def merge_distinct_counts(
														
 
															         self, model, destination, sources, timestamp=None, environment_ids=None
														
 
															     ):
														
@@ -124,35 +111,6 @@ class DummyTSDB(BaseTSDB):
 
															     ):
														
 
															         self.validate_arguments([model for model, request in requests], [environment_id])
														
 
															-    def get_most_frequent(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: Sequence[TSDBKey],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        limit: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[TSDBKey, list[tuple[str, float]]]:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-        return {key: [] for key in keys}
														
 
															-
														
 
															-    def get_most_frequent_series(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: Iterable[str],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        limit: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[str, list[tuple[int, dict[str, float]]]]:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
														
 
															-        return {key: [(timestamp, {}) for timestamp in series] for key in keys}
														
 
															-
														
 
															     def get_frequency_series(
														
 
															         self,
														
 
															         model: TSDBModel,
														
@@ -171,22 +129,6 @@ class DummyTSDB(BaseTSDB):
 
															             for key, members in items.items()
														
 
															         }
														
 
															-    def get_frequency_totals(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        items: Mapping[TSDBKey, Sequence[TSDBItem]],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[TSDBKey, dict[TSDBItem, float]]:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-        results = {}
														
 
															-        for key, members in items.items():
														
 
															-            results[key] = {member: 0.0 for member in members}
														
 
															-        return results
														
 
															-
														
 
															     def merge_frequencies(
														
 
															         self,
														
 
															         model: TSDBModel,
														
--- a/src/sentry/tsdb/redis.py
+++ b/src/sentry/tsdb/redis.py
@@ -1,12 +1,10 @@
 
															 import binascii
														
 
															 import itertools
														
 
															 import logging
														
 
															-import random
														
 
															 import uuid
														
 
															 from collections import defaultdict, namedtuple
														
 
															 from collections.abc import Callable, Iterable, Mapping, Sequence
														
 
															 from datetime import datetime
														
 
															-from functools import reduce
														
 
															 from hashlib import md5
														
 
															 from typing import Any, ContextManager, Generic, TypeVar
														
@@ -17,12 +15,7 @@ from redis.client import Script
 
															 from sentry.tsdb.base import BaseTSDB, IncrMultiOptions, TSDBItem, TSDBKey, TSDBModel
														
 
															 from sentry.utils.dates import to_datetime
														
 
															-from sentry.utils.redis import (
														
 
															-    check_cluster_versions,
														
 
															-    get_cluster_from_options,
														
 
															-    is_instance_rb_cluster,
														
 
															-    load_redis_script,
														
 
															-)
														
 
															+from sentry.utils.redis import check_cluster_versions, get_cluster_from_options, load_redis_script
														
 
															 from sentry.utils.versioning import Version
														
 
															 logger = logging.getLogger(__name__)
														
@@ -563,95 +556,6 @@ class RedisTSDB(BaseTSDB):
 
															         return {key: value.value for key, value in responses.items()}
														
 
															-    def get_distinct_counts_union(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: list[int] | None,
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> int:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-
														
 
															-        if not keys:
														
 
															-            return 0
														
 
															-
														
 
															-        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
														
 
															-
														
 
															-        temporary_id = uuid.uuid1().hex
														
 
															-
														
 
															-        def make_temporary_key(key: str | int) -> str:
														
 
															-            return f"{self.prefix}{temporary_id}:{key}"
														
 
															-
														
 
															-        def expand_key(key: int) -> list[int | str]:
														
 
															-            """
														
 
															-            Return a list containing all keys for each interval in the series for a key.
														
 
															-            """
														
 
															-            return [
														
 
															-                self.make_key(model, rollup, timestamp, key, environment_id) for timestamp in series
														
 
															-            ]
														
 
															-
														
 
															-        cluster, _ = self.get_cluster(environment_id)
														
 
															-        if is_instance_rb_cluster(cluster, False):
														
 
															-            router = cluster.get_router()
														
 
															-        else:
														
 
															-            raise AssertionError("unreachable")
														
 
															-
														
 
															-        def map_key_to_host(hosts: dict[int, set[int]], key: int) -> dict[int, set[int]]:
														
 
															-            """
														
 
															-            Identify the host where a key is located and add it to the host map.
														
 
															-            """
														
 
															-            hosts[router.get_host_for_key(key)].add(key)
														
 
															-            return hosts
														
 
															-
														
 
															-        def get_partition_aggregate(value: tuple[int, set[int]]) -> tuple[int, int]:
														
 
															-            """
														
 
															-            Fetch the HyperLogLog value (in its raw byte representation) that
														
 
															-            results from merging all HyperLogLogs at the provided keys.
														
 
															-            """
														
 
															-            (host, _keys) = value
														
 
															-            destination = make_temporary_key(f"p:{host}")
														
 
															-            client = cluster.get_local_client(host)
														
 
															-            with client.pipeline(transaction=False) as pipeline:
														
 
															-                pipeline.execute_command(
														
 
															-                    "PFMERGE",
														
 
															-                    destination,
														
 
															-                    *itertools.chain.from_iterable(expand_key(key) for key in _keys),
														
 
															-                )
														
 
															-                pipeline.get(destination)
														
 
															-                pipeline.delete(destination)
														
 
															-                return host, pipeline.execute()[1]
														
 
															-
														
 
															-        def merge_aggregates(values: list[tuple[int, int]]) -> int:
														
 
															-            """
														
 
															-            Calculate the cardinality of the provided HyperLogLog values.
														
 
															-            """
														
 
															-            destination = make_temporary_key("a")  # all values will be merged into this key
														
 
															-            aggregates = {make_temporary_key(f"a:{host}"): value for host, value in values}
														
 
															-
														
 
															-            # Choose a random host to execute the reduction on. (We use a host
														
 
															-            # here that we've already accessed as part of this process -- this
														
 
															-            # way, we constrain the choices to only hosts that we know are
														
 
															-            # running.)
														
 
															-            client = cluster.get_local_client(random.choice(values)[0])
														
 
															-            with client.pipeline(transaction=False) as pipeline:
														
 
															-                pipeline.mset(aggregates)
														
 
															-                pipeline.execute_command("PFMERGE", destination, *aggregates.keys())
														
 
															-                pipeline.execute_command("PFCOUNT", destination)
														
 
															-                pipeline.delete(destination, *aggregates.keys())
														
 
															-                return pipeline.execute()[2]
														
 
															-
														
 
															-        # TODO: This could be optimized to skip the intermediate step for the
														
 
															-        # host that has the largest number of keys if the final merge and count
														
 
															-        # is performed on that host. If that host contains *all* keys, the
														
 
															-        # final reduction could be performed as a single PFCOUNT, skipping the
														
 
															-        # MSET and PFMERGE operations entirely.
														
 
															-
														
 
															-        reduced: dict[int, set[int]] = reduce(map_key_to_host, set(keys), defaultdict(set))
														
 
															-        return merge_aggregates([get_partition_aggregate(x) for x in reduced.items()])
														
 
															-
														
 
															     def merge_distinct_counts(
														
 
															         self,
														
 
															         model: TSDBModel,
														
@@ -828,90 +732,6 @@ class RedisTSDB(BaseTSDB):
 
															                 if durable:
														
 
															                     raise
														
 
															-    def get_most_frequent(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: Sequence[TSDBKey],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        limit: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, int | str] | None = None,
														
 
															-    ) -> dict[TSDBKey, list[tuple[str, float]]]:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-
														
 
															-        if not self.enable_frequency_sketches:
														
 
															-            raise NotImplementedError("Frequency sketches are disabled.")
														
 
															-
														
 
															-        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
														
 
															-
														
 
															-        arguments = ["RANKED"] + list(self.DEFAULT_SKETCH_PARAMETERS)
														
 
															-        if limit is not None:
														
 
															-            arguments.append(int(limit))
														
 
															-
														
 
															-        commands = {}
														
 
															-        for key in keys:
														
 
															-            ks = []
														
 
															-            for timestamp in series:
														
 
															-                ks.extend(
														
 
															-                    self.make_frequency_table_keys(model, rollup, timestamp, key, environment_id)
														
 
															-                )
														
 
															-            commands[key] = [(CountMinScript, ks, arguments)]
														
 
															-
														
 
															-        results = {}
														
 
															-        cluster, _ = self.get_cluster(environment_id)
														
 
															-        for _key, responses in cluster.execute_commands(commands).items():
														
 
															-            results[_key] = [
														
 
															-                (member.decode("utf-8"), float(score)) for member, score in responses[0].value
														
 
															-            ]
														
 
															-
														
 
															-        return results
														
 
															-
														
 
															-    def get_most_frequent_series(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        keys: Iterable[str],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        limit: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, int | str] | None = None,
														
 
															-    ) -> dict[str, list[tuple[int, dict[str, float]]]]:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-
														
 
															-        if not self.enable_frequency_sketches:
														
 
															-            raise NotImplementedError("Frequency sketches are disabled.")
														
 
															-
														
 
															-        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
														
 
															-
														
 
															-        arguments = ["RANKED"] + list(self.DEFAULT_SKETCH_PARAMETERS)
														
 
															-        if limit is not None:
														
 
															-            arguments.append(int(limit))
														
 
															-
														
 
															-        commands: dict[str, list[tuple[Script, list[str], list[str | int]]]] = {}
														
 
															-        for key in keys:
														
 
															-            commands[key] = [
														
 
															-                (
														
 
															-                    CountMinScript,
														
 
															-                    self.make_frequency_table_keys(model, rollup, timestamp, key, environment_id),
														
 
															-                    arguments,
														
 
															-                )
														
 
															-                for timestamp in series
														
 
															-            ]
														
 
															-
														
 
															-        def unpack_response(response: rb.Promise) -> dict[str, float]:
														
 
															-            return {item.decode("utf-8"): float(score) for item, score in response.value}
														
 
															-
														
 
															-        results: dict[str, list[tuple[int, dict[str, float]]]] = {}
														
 
															-        cluster, _ = self.get_cluster(environment_id)
														
 
															-        for key, responses in cluster.execute_commands(commands).items():
														
 
															-            zipped_series = zip(series, (unpack_response(response) for response in responses))
														
 
															-            results[key] = list(zipped_series)
														
 
															-
														
 
															-        return results
														
 
															-
														
 
															     def get_frequency_series(
														
 
															         self,
														
 
															         model: TSDBModel,
														
@@ -961,33 +781,6 @@ class RedisTSDB(BaseTSDB):
 
															         return results
														
 
															-    def get_frequency_totals(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        items: Mapping[TSDBKey, Sequence[TSDBItem]],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[TSDBKey, dict[TSDBItem, float]]:
														
 
															-        self.validate_arguments([model], [environment_id])
														
 
															-
														
 
															-        if not self.enable_frequency_sketches:
														
 
															-            raise NotImplementedError("Frequency sketches are disabled.")
														
 
															-
														
 
															-        responses: dict[TSDBKey, dict[TSDBItem, float]] = {}
														
 
															-        frequency_series = self.get_frequency_series(
														
 
															-            model, items, start, end, rollup, environment_id
														
 
															-        )
														
 
															-        for _key, series in frequency_series.items():
														
 
															-            response = responses[_key] = defaultdict(float)
														
 
															-            for timestamp, results in series:
														
 
															-                for member, value in results.items():
														
 
															-                    response[member] = response.get(member, 0) + value
														
 
															-
														
 
															-        return responses
														
 
															-
														
 
															     def merge_frequencies(
														
 
															         self,
														
 
															         model: TSDBModel,
														
--- a/src/sentry/tsdb/redissnuba.py
+++ b/src/sentry/tsdb/redissnuba.py
@@ -31,11 +31,7 @@ method_specifications = {
 
															     "get_distinct_counts_series": (READ, single_model_argument),
														
 
															     "get_distinct_counts_totals": (READ, single_model_argument),
														
 
															     "get_distinct_counts_totals_with_conditions": (READ, single_model_argument),
														
 
															-    "get_distinct_counts_union": (READ, single_model_argument),
														
 
															-    "get_most_frequent": (READ, single_model_argument),
														
 
															-    "get_most_frequent_series": (READ, single_model_argument),
														
 
															     "get_frequency_series": (READ, single_model_argument),
														
 
															-    "get_frequency_totals": (READ, single_model_argument),
														
 
															     "incr": (WRITE, single_model_argument),
														
 
															     "incr_multi": (WRITE, lambda callargs: {item[0] for item in callargs["items"]}),
														
 
															     "merge": (WRITE, single_model_argument),
														
--- a/src/sentry/tsdb/snuba.py
+++ b/src/sentry/tsdb/snuba.py
@@ -832,88 +832,6 @@ class SnubaTSDB(BaseTSDB):
 
															             conditions=conditions,
														
 
															         )
														
 
															-    def get_distinct_counts_union(
														
 
															-        self, model, keys, start, end=None, rollup=None, environment_id=None, tenant_ids=None
														
 
															-    ):
														
 
															-        return self.get_data(
														
 
															-            model,
														
 
															-            keys,
														
 
															-            start,
														
 
															-            end,
														
 
															-            rollup,
														
 
															-            [environment_id] if environment_id is not None else None,
														
 
															-            aggregation="uniq",
														
 
															-            group_on_model=False,
														
 
															-            tenant_ids=tenant_ids,
														
 
															-        )
														
 
															-
														
 
															-    def get_most_frequent(
														
 
															-        self,
														
 
															-        model,
														
 
															-        keys: Sequence[TSDBKey],
														
 
															-        start,
														
 
															-        end=None,
														
 
															-        rollup=None,
														
 
															-        limit=10,
														
 
															-        environment_id=None,
														
 
															-        tenant_ids=None,
														
 
															-    ):
														
 
															-        aggregation = f"topK({limit})"
														
 
															-        result = self.get_data(
														
 
															-            model,
														
 
															-            keys,
														
 
															-            start,
														
 
															-            end,
														
 
															-            rollup,
														
 
															-            [environment_id] if environment_id is not None else None,
														
 
															-            aggregation=aggregation,
														
 
															-            tenant_ids=tenant_ids,
														
 
															-        )
														
 
															-        # convert
														
 
															-        #    {group:[top1, ...]}
														
 
															-        # into
														
 
															-        #    {group: [(top1, score), ...]}
														
 
															-        for k, top in result.items():
														
 
															-            item_scores = [(v, float(i + 1)) for i, v in enumerate(reversed(top or []))]
														
 
															-            result[k] = list(reversed(item_scores))
														
 
															-
														
 
															-        return result
														
 
															-
														
 
															-    def get_most_frequent_series(
														
 
															-        self,
														
 
															-        model,
														
 
															-        keys,
														
 
															-        start,
														
 
															-        end=None,
														
 
															-        rollup=None,
														
 
															-        limit=10,
														
 
															-        environment_id=None,
														
 
															-        tenant_ids=None,
														
 
															-    ):
														
 
															-        aggregation = f"topK({limit})"
														
 
															-        result = self.get_data(
														
 
															-            model,
														
 
															-            keys,
														
 
															-            start,
														
 
															-            end,
														
 
															-            rollup,
														
 
															-            [environment_id] if environment_id is not None else None,
														
 
															-            aggregation=aggregation,
														
 
															-            group_on_time=True,
														
 
															-            tenant_ids=tenant_ids,
														
 
															-        )
														
 
															-        # convert
														
 
															-        #    {group:{timestamp:[top1, ...]}}
														
 
															-        # into
														
 
															-        #    {group: [(timestamp, {top1: score, ...}), ...]}
														
 
															-        return {
														
 
															-            k: sorted(
														
 
															-                (timestamp, {v: float(i + 1) for i, v in enumerate(reversed(topk or []))})
														
 
															-                for (timestamp, topk) in result[k].items()
														
 
															-            )
														
 
															-            for k in result.keys()
														
 
															-        }
														
 
															-
														
 
															     def get_frequency_series(
														
 
															         self,
														
 
															         model: TSDBModel,
														
@@ -941,27 +859,6 @@ class SnubaTSDB(BaseTSDB):
 
															         #    {group: [(timestamp, {agg: count, ...}), ...]}
														
 
															         return {k: sorted(result[k].items()) for k in result}
														
 
															-    def get_frequency_totals(
														
 
															-        self,
														
 
															-        model: TSDBModel,
														
 
															-        items: Mapping[TSDBKey, Sequence[TSDBItem]],
														
 
															-        start: datetime,
														
 
															-        end: datetime | None = None,
														
 
															-        rollup: int | None = None,
														
 
															-        environment_id: int | None = None,
														
 
															-        tenant_ids: dict[str, str | int] | None = None,
														
 
															-    ) -> dict[TSDBKey, dict[TSDBItem, float]]:
														
 
															-        return self.get_data(
														
 
															-            model,
														
 
															-            items,
														
 
															-            start,
														
 
															-            end,
														
 
															-            rollup,
														
 
															-            [environment_id] if environment_id is not None else None,
														
 
															-            aggregation="count()",
														
 
															-            tenant_ids=tenant_ids,
														
 
															-        )
														
 
															-
														
 
															     def flatten_keys(self, items: Mapping | Sequence | Set) -> tuple[list, Sequence | None]:
														
 
															         """
														
 
															         Returns a normalized set of keys based on the various formats accepted
														
--- a/tests/sentry/event_manager/test_event_manager.py
+++ b/tests/sentry/event_manager/test_event_manager.py
@@ -1163,16 +1163,6 @@ class EventManagerTest(TestCase, SnubaTestCase, EventManagerTestMixin, Performan
 
															         assert query(TSDBModel.project, project.id, environment_id=environment_id) == 1
														
 
															         assert query(TSDBModel.group, event.group.id, environment_id=environment_id) == 1
														
 
															-    @pytest.mark.xfail
														
 
															-    def test_record_frequencies(self) -> None:
														
 
															-        project = self.project
														
 
															-        manager = EventManager(make_event())
														
 
															-        event = manager.save(project.id)
														
 
															-
														
 
															-        assert tsdb.backend.get_most_frequent(
														
 
															-            TSDBModel.frequent_issues_by_project, (event.project.id,), event.datetime
														
 
															-        ) == {event.project.id: [(event.group_id, 1.0)]}
														
 
															-
														
 
															     def test_event_user(self) -> None:
														
 
															         event_id = uuid.uuid4().hex
														
 
															         manager = EventManager(
														
--- a/tests/sentry/tsdb/test_redis.py
+++ b/tests/sentry/tsdb/test_redis.py
@@ -253,21 +253,6 @@ class RedisTSDBTest(TestCase):
 
															         )
														
 
															         assert results == {1: 0, 2: 0}
														
 
															-        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
														
 
															-        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3
														
 
															-        assert (
														
 
															-            self.db.get_distinct_counts_union(
														
 
															-                model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1
														
 
															-            )
														
 
															-            == 1
														
 
															-        )
														
 
															-        assert (
														
 
															-            self.db.get_distinct_counts_union(
														
 
															-                model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=0
														
 
															-            )
														
 
															-            == 0
														
 
															-        )
														
 
															-
														
 
															         self.db.merge_distinct_counts(model, 1, [2], dts[0], environment_ids=[0, 1])
														
 
															         assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1], rollup=3600) == {
														
@@ -308,11 +293,6 @@ class RedisTSDBTest(TestCase):
 
															         results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600)
														
 
															         assert results == {1: 3, 2: 0}
														
 
															-        assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0
														
 
															-        assert self.db.get_distinct_counts_union(model, [1], dts[0], dts[-1], rollup=3600) == 3
														
 
															-        assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3
														
 
															-        assert self.db.get_distinct_counts_union(model, [2], dts[0], dts[-1], rollup=3600) == 0
														
 
															-
														
 
															         self.db.delete_distinct_counts([model], [1, 2], dts[0], dts[-1], environment_ids=[0, 1])
														
 
															         results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1])
														
@@ -369,74 +349,8 @@ class RedisTSDBTest(TestCase):
 
															             environment_id=1,
														
 
															         )
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            model, ("organization:1", "organization:2"), now, rollup=rollup
														
 
															-        ) == {
														
 
															-            "organization:1": [("project:3", 3.0), ("project:2", 2.0), ("project:1", 1.0)],
														
 
															-            "organization:2": [],
														
 
															-        }
														
 
															-
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            model,
														
 
															-            ("organization:1", "organization:2"),
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-            environment_id=1,
														
 
															-        ) == {
														
 
															-            "organization:1": [("project:4", 3.0), ("project:3", 2.0), ("project:2", 1.0)],
														
 
															-            "organization:2": [("project:5", 0.5)],
														
 
															-        }
														
 
															-
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            model, ("organization:1", "organization:2"), now, limit=1, rollup=rollup
														
 
															-        ) == {"organization:1": [("project:3", 3.0)], "organization:2": []}
														
 
															-
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            model,
														
 
															-            ("organization:1", "organization:2"),
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-        ) == {
														
 
															-            "organization:1": [
														
 
															-                ("project:3", 3.0 + 3.0),
														
 
															-                ("project:2", 2.0 + 2.0),
														
 
															-                ("project:4", 4.0),
														
 
															-                ("project:1", 1.0 + 1.0),
														
 
															-            ],
														
 
															-            "organization:2": [("project:5", 1.5)],
														
 
															-        }
														
 
															-
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            model,
														
 
															-            ("organization:1", "organization:2"),
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-            environment_id=0,
														
 
															-        ) == {"organization:1": [], "organization:2": []}
														
 
															-
														
 
															         timestamp = int(now.timestamp() // rollup) * rollup
														
 
															-        assert self.db.get_most_frequent_series(
														
 
															-            model,
														
 
															-            ("organization:1", "organization:2", "organization:3"),
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-        ) == {
														
 
															-            "organization:1": [
														
 
															-                (
														
 
															-                    timestamp - rollup,
														
 
															-                    {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0},
														
 
															-                ),
														
 
															-                (timestamp, {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0}),
														
 
															-            ],
														
 
															-            "organization:2": [(timestamp - rollup, {"project:5": 1.5}), (timestamp, {})],
														
 
															-            "organization:3": [(timestamp - rollup, {}), (timestamp, {})],
														
 
															-        }
														
 
															-
														
 
															         assert self.db.get_frequency_series(
														
 
															             model,
														
 
															             {
														
@@ -490,89 +404,10 @@ class RedisTSDBTest(TestCase):
 
															             ],
														
 
															         }
														
 
															-        assert self.db.get_frequency_totals(
														
 
															-            model,
														
 
															-            {
														
 
															-                "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"),
														
 
															-                "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"),
														
 
															-            },
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-        ) == {
														
 
															-            "organization:1": {
														
 
															-                "project:1": 1.0 + 1.0,
														
 
															-                "project:2": 2.0 + 2.0,
														
 
															-                "project:3": 3.0 + 3.0,
														
 
															-                "project:4": 4.0,
														
 
															-                "project:5": 0.0,
														
 
															-            },
														
 
															-            "organization:2": {
														
 
															-                "project:1": 0.0,
														
 
															-                "project:2": 0.0,
														
 
															-                "project:3": 0.0,
														
 
															-                "project:4": 0.0,
														
 
															-                "project:5": 1.5,
														
 
															-            },
														
 
															-        }
														
 
															-
														
 
															         self.db.merge_frequencies(
														
 
															             model, "organization:1", ["organization:2"], now, environment_ids=[0, 1]
														
 
															         )
														
 
															-        assert self.db.get_frequency_totals(
														
 
															-            model,
														
 
															-            {
														
 
															-                "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"),
														
 
															-                "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"),
														
 
															-            },
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-        ) == {
														
 
															-            "organization:1": {
														
 
															-                "project:1": 1.0 + 1.0,
														
 
															-                "project:2": 2.0 + 2.0,
														
 
															-                "project:3": 3.0 + 3.0,
														
 
															-                "project:4": 4.0,
														
 
															-                "project:5": 1.5,
														
 
															-            },
														
 
															-            "organization:2": {
														
 
															-                "project:1": 0.0,
														
 
															-                "project:2": 0.0,
														
 
															-                "project:3": 0.0,
														
 
															-                "project:4": 0.0,
														
 
															-                "project:5": 0.0,
														
 
															-            },
														
 
															-        }
														
 
															-
														
 
															-        assert self.db.get_frequency_totals(
														
 
															-            model,
														
 
															-            {
														
 
															-                "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"),
														
 
															-                "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"),
														
 
															-            },
														
 
															-            now - timedelta(hours=1),
														
 
															-            now,
														
 
															-            rollup=rollup,
														
 
															-            environment_id=1,
														
 
															-        ) == {
														
 
															-            "organization:1": {
														
 
															-                "project:1": 0.0,
														
 
															-                "project:2": 1.0,
														
 
															-                "project:3": 2.0,
														
 
															-                "project:4": 3.0,
														
 
															-                "project:5": 0.5,
														
 
															-            },
														
 
															-            "organization:2": {
														
 
															-                "project:1": 0.0,
														
 
															-                "project:2": 0.0,
														
 
															-                "project:3": 0.0,
														
 
															-                "project:4": 0.0,
														
 
															-                "project:5": 0.0,
														
 
															-            },
														
 
															-        }
														
 
															-
														
 
															         self.db.delete_frequencies(
														
 
															             [model],
														
 
															             ["organization:1", "organization:2"],
														
@@ -581,15 +416,6 @@ class RedisTSDBTest(TestCase):
 
															             environment_ids=[0, 1],
														
 
															         )
														
 
															-        assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now) == {
														
 
															-            "organization:1": [],
														
 
															-            "organization:2": [],
														
 
															-        }
														
 
															-
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            model, ("organization:1", "organization:2"), now, environment_id=1
														
 
															-        ) == {"organization:1": [], "organization:2": []}
														
 
															-
														
 
															     def test_frequency_table_import_export_no_estimators(self):
														
 
															         client = self.db.cluster.get_local_client_for_key("key")
														
--- a/tests/snuba/tsdb/test_tsdb_backend.py
+++ b/tests/snuba/tsdb/test_tsdb_backend.py
@@ -483,31 +483,6 @@ class SnubaTSDBTest(TestCase, SnubaTestCase):
 
															             == {}
														
 
															         )
														
 
															-    def test_most_frequent(self):
														
 
															-        assert self.db.get_most_frequent(
														
 
															-            TSDBModel.frequent_issues_by_project,
														
 
															-            [self.proj1.id],
														
 
															-            self.now,
														
 
															-            self.now + timedelta(hours=4),
														
 
															-            rollup=3600,
														
 
															-            tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															-        ) in [
														
 
															-            {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
														
 
															-            {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
														
 
															-        ]  # Both issues equally frequent
														
 
															-
														
 
															-        assert (
														
 
															-            self.db.get_most_frequent(
														
 
															-                TSDBModel.frequent_issues_by_project,
														
 
															-                [],
														
 
															-                self.now,
														
 
															-                self.now + timedelta(hours=4),
														
 
															-                rollup=3600,
														
 
															-                tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															-            )
														
 
															-            == {}
														
 
															-        )
														
 
															-
														
 
															     def test_frequency_series(self):
														
 
															         dts = [self.now + timedelta(hours=i) for i in range(4)]
														
 
															         assert self.db.get_frequency_series(
														
@@ -555,81 +530,45 @@ class SnubaTSDBTest(TestCase, SnubaTestCase):
 
															         project_id = self.proj1.id
														
 
															         dts = [self.now + timedelta(hours=i) for i in range(4)]
														
 
															-        results = self.db.get_most_frequent(
														
 
															-            TSDBModel.frequent_issues_by_project,
														
 
															-            [project_id],
														
 
															-            dts[0],
														
 
															-            dts[0],
														
 
															-            tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															-        )
														
 
															-        assert has_shape(results, {1: [(1, 1.0)]})
														
 
															-
														
 
															-        results = self.db.get_most_frequent_series(
														
 
															-            TSDBModel.frequent_issues_by_project,
														
 
															-            [project_id],
														
 
															-            dts[0],
														
 
															-            dts[0],
														
 
															-            tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															-        )
														
 
															-        assert has_shape(results, {1: [(1, {1: 1.0})]})
														
 
															-
														
 
															         items = {
														
 
															             # {project_id: (issue_id, issue_id, ...)}
														
 
															             project_id: (self.proj1group1.id, self.proj1group2.id)
														
 
															         }
														
 
															-        results = self.db.get_frequency_series(
														
 
															-            TSDBModel.frequent_issues_by_project,
														
 
															-            items,
														
 
															-            dts[0],
														
 
															-            dts[-1],
														
 
															-            tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															-        )
														
 
															-        assert has_shape(results, {1: [(1, {1: 1})]})
														
 
															-
														
 
															-        results = self.db.get_frequency_totals(
														
 
															+        results1 = self.db.get_frequency_series(
														
 
															             TSDBModel.frequent_issues_by_project,
														
 
															             items,
														
 
															             dts[0],
														
 
															             dts[-1],
														
 
															             tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															         )
														
 
															-        assert has_shape(results, {1: {1: 1}})
														
 
															+        assert has_shape(results1, {1: [(1, {1: 1})]})
														
 
															-        results = self.db.get_range(
														
 
															+        results2 = self.db.get_range(
														
 
															             TSDBModel.project,
														
 
															             [project_id],
														
 
															             dts[0],
														
 
															             dts[-1],
														
 
															             tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															         )
														
 
															-        assert has_shape(results, {1: [(1, 1)]})
														
 
															-
														
 
															-        results = self.db.get_distinct_counts_series(
														
 
															-            TSDBModel.users_affected_by_project,
														
 
															-            [project_id],
														
 
															-            dts[0],
														
 
															-            dts[-1],
														
 
															-            tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															-        )
														
 
															-        assert has_shape(results, {1: [(1, 1)]})
														
 
															+        assert has_shape(results2, {1: [(1, 1)]})
														
 
															-        results = self.db.get_distinct_counts_totals(
														
 
															+        results3 = self.db.get_distinct_counts_series(
														
 
															             TSDBModel.users_affected_by_project,
														
 
															             [project_id],
														
 
															             dts[0],
														
 
															             dts[-1],
														
 
															             tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															         )
														
 
															-        assert has_shape(results, {1: 1})
														
 
															+        assert has_shape(results3, {1: [(1, 1)]})
														
 
															-        results = self.db.get_distinct_counts_union(
														
 
															+        results4 = self.db.get_distinct_counts_totals(
														
 
															             TSDBModel.users_affected_by_project,
														
 
															             [project_id],
														
 
															             dts[0],
														
 
															             dts[-1],
														
 
															             tenant_ids={"referrer": "r", "organization_id": 1234},
														
 
															         )
														
 
															-        assert has_shape(results, 1)
														
 
															+        assert has_shape(results4, {1: 1})
														
 
															     def test_calculated_limit(self):