Browse Source

perf(issues): add pseudo-cache to test_frequency_rates in GroupSnooze (#69939)

Reducing unnecessary Snuba queries by only querying when Redis count
reached.
Follows exactly same pattern as
https://github.com/getsentry/sentry/pull/69556
Bartek Ogryczak 10 months ago
parent
commit
3a67f2bca5

+ 2 - 0
src/sentry/conf/server.py

@@ -1581,6 +1581,8 @@ SENTRY_FEATURES: dict[str, bool | None] = {
     "organizations:grouping-tree-ui": False,
     # Enable caching group counts in GroupSnooze
     "organizations:groupsnooze-cached-counts": False,
+    # Enable caching group frequency rates in GroupSnooze
+    "organizations:groupsnooze-cached-rates": False,
     # Allows an org to have a larger set of project ownership rules per project
     "organizations:higher-ownership-limit": False,
     # Enable incidents feature

+ 1 - 0
src/sentry/features/temporary.py

@@ -79,6 +79,7 @@ def register_temporary_features(manager: FeatureManager):
     manager.add("organizations:grouping-title-ui", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
     manager.add("organizations:grouping-tree-ui", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
     manager.add("organizations:groupsnooze-cached-counts", OrganizationFeature, FeatureHandlerStrategy.OPTIONS)
+    manager.add("organizations:groupsnooze-cached-rates", OrganizationFeature, FeatureHandlerStrategy.OPTIONS)
     manager.add("organizations:higher-ownership-limit", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
     manager.add("organizations:increased-issue-owners-rate-limit", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
     manager.add("organizations:integrations-custom-alert-priorities", OrganizationFeature, FeatureHandlerStrategy.REMOTE)

+ 51 - 1
src/sentry/models/groupsnooze.py

@@ -97,7 +97,57 @@ class GroupSnooze(Model):
         return True
 
     def test_frequency_rates(self) -> bool:
-        metrics.incr("groupsnooze.test_frequency_rates")
+        if features.has(
+            "organizations:groupsnooze-cached-rates", organization=self.group.project.organization
+        ):
+            return self.test_frequency_rates_w_cache()
+        else:
+            return self.test_frequency_rates_no_cache()
+
+    def test_frequency_rates_w_cache(self) -> bool:
+        cache_key = f"groupsnooze:v1:{self.id}:test_frequency_rate:events_seen_counter"
+
+        cache_ttl = self.window * 60  # Redis TTL in seconds (window is in minutes)
+
+        value: int | float = float("inf")  # using +inf as a sentinel value
+
+        try:
+            value = cache.incr(cache_key)
+            cache.touch(cache_key, cache_ttl)
+        except ValueError:
+            # key doesn't exist, fall back on sentinel value
+            pass
+
+        if value < self.count:
+            metrics.incr("groupsnooze.test_frequency_rates", tags={"cached": "true", "hit": "true"})
+            return True
+
+        metrics.incr("groupsnooze.test_frequency_rates", tags={"cached": "true", "hit": "false"})
+        metrics.incr("groupsnooze.test_frequency_rates.snuba_call")
+        end = timezone.now()
+        start = end - timedelta(minutes=self.window)
+
+        rate = tsdb.backend.get_sums(
+            model=get_issue_tsdb_group_model(self.group.issue_category),
+            keys=[self.group_id],
+            start=start,
+            end=end,
+            tenant_ids={"organization_id": self.group.project.organization_id},
+            referrer_suffix="frequency_snoozes",
+        )[self.group_id]
+
+        # TTL is further into the future than it needs to be, but we'd rather over-estimate
+        # and call Snuba more often than under-estimate and not trigger
+        cache.set(cache_key, rate, cache_ttl)
+
+        if rate >= self.count:
+            return False
+
+        return True
+
+    def test_frequency_rates_no_cache(self) -> bool:
+        metrics.incr("groupsnooze.test_frequency_rates", tags={"cached": "false"})
+        metrics.incr("groupsnooze.test_frequency_rates.snuba_call")
 
         end = timezone.now()
         start = end - timedelta(minutes=self.window)

+ 80 - 0
tests/sentry/models/test_groupsnooze.py

@@ -190,6 +190,7 @@ class GroupSnoozeTest(
 
 
 @apply_feature_flag_on_cls("organizations:groupsnooze-cached-counts")
+@apply_feature_flag_on_cls("organizations:groupsnooze-cached-rates")
 class GroupSnoozeWCacheTest(GroupSnoozeTest):
     """
     Test the cached version of the snooze.
@@ -367,3 +368,82 @@ class GroupSnoozeWCacheTest(GroupSnoozeTest):
             assert not snooze.is_valid(test_rates=True)
             assert mocked_count_users_seen.call_count == 3
             assert cache_spy.set.called_with(cache_key, 100, 300)
+
+    def test_test_frequency_rates_w_cache(self):
+        snooze = GroupSnooze.objects.create(group=self.group, count=100, window=60)
+
+        cache_key = f"groupsnooze:v1:{snooze.id}:test_frequency_rate:events_seen_counter"
+
+        with (
+            mock.patch("sentry.models.groupsnooze.tsdb.backend.get_sums") as mocked_get_sums,
+            mock.patch.object(
+                sentry.models.groupsnooze, "cache", wraps=sentry.models.groupsnooze.cache  # type: ignore[attr-defined]
+            ) as cache_spy,
+        ):
+            mocked_get_sums.side_effect = [{snooze.group_id: c} for c in [95, 98, 100]]
+
+            cache_spy.set = mock.Mock(side_effect=cache_spy.set)
+            cache_spy.incr = mock.Mock(side_effect=cache_spy.incr)
+
+            assert snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 1
+            assert cache_spy.set.called_with(cache_key, 95, 3600)
+
+            assert snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 1
+            assert cache_spy.incr.called_with(cache_key)
+            assert cache_spy.get(cache_key) == 96
+
+            assert snooze.is_valid(test_rates=True)
+            assert cache_spy.get(cache_key) == 97
+            assert snooze.is_valid(test_rates=True)
+            assert cache_spy.get(cache_key) == 98
+            assert snooze.is_valid(test_rates=True)
+            assert cache_spy.get(cache_key) == 99
+
+            # cache counter reaches 100, but gets 98 from get_distinct_counts_totals
+
+            assert snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 2
+            assert cache_spy.set.called_with(cache_key, 98, 3600)
+            assert cache_spy.get(cache_key) == 98
+
+            assert snooze.is_valid(test_rates=True)
+            assert cache_spy.get(cache_key) == 99
+            # with this call counter reaches 100, gets 100 from get_distinct_counts_totals, so is_valid returns False
+            assert not snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 3
+
+    def test_test_frequency_rates_w_cache_expired(self):
+        snooze = GroupSnooze.objects.create(group=self.group, count=100, window=60)
+
+        cache_key = f"groupsnooze:v1:{snooze.id}:test_frequency_rate:events_seen_counter"
+
+        with (
+            mock.patch("sentry.models.groupsnooze.tsdb.backend.get_sums") as mocked_get_sums,
+            mock.patch.object(
+                sentry.models.groupsnooze, "cache", wraps=sentry.models.groupsnooze.cache  # type: ignore[attr-defined]
+            ) as cache_spy,
+        ):
+            mocked_get_sums.side_effect = [{snooze.group_id: c} for c in [98, 99, 100]]
+
+            cache_spy.set = mock.Mock(side_effect=cache_spy.set)
+            cache_spy.incr = mock.Mock(side_effect=cache_spy.incr)
+
+            assert snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 1
+            assert cache_spy.set.called_with(cache_key, 98, 3600)
+
+            # simulate cache expiration
+            cache_spy.delete(cache_key)
+
+            assert snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 2
+            assert cache_spy.set.called_with(cache_key, 99, 3600)
+
+            # simulate cache expiration
+            cache_spy.delete(cache_key)
+
+            assert not snooze.is_valid(test_rates=True)
+            assert mocked_get_sums.call_count == 3
+            assert cache_spy.set.called_with(cache_key, 100, 3600)