1 year ago · b31a1b293e
--- a/src/sentry/conf/server.py
+++ b/src/sentry/conf/server.py
@@ -1661,6 +1661,8 @@ SENTRY_FEATURES: dict[str, bool | None] = {
 
				     "organizations:mobile-cpu-memory-in-transactions": False,
			
 
				     # Enable Monitors (Crons) view
			
 
				     "organizations:monitors": False,
			
 
				+    # Enable rate-limiting via relay for Monitors (crons)
			
 
				+    "organizations:monitors-quota-rate-limit": False,
			
 
				     # Enables higher limit for alert rules
			
 
				     "organizations:more-slow-alerts": False,
			
 
				     # Enables region provisioning for individual users
			
--- a/src/sentry/features/__init__.py
+++ b/src/sentry/features/__init__.py
@@ -149,6 +149,7 @@ default_manager.add("organizations:mobile-cpu-memory-in-transactions", Organizat
 
				 default_manager.add("organizations:mobile-ttid-ttfd-contribution", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
			
 
				 default_manager.add("organizations:mobile-vitals", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
			
 
				 default_manager.add("organizations:monitors", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
			
 
				+default_manager.add("organizations:monitors-quota-rate-limit", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
			
 
				 default_manager.add("organizations:more-slow-alerts", OrganizationFeature, FeatureHandlerStrategy.INTERNAL)
			
 
				 default_manager.add("organizations:new-page-filter", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
			
 
				 default_manager.add("organizations:new-weekly-report", OrganizationFeature, FeatureHandlerStrategy.REMOTE)
			
--- a/src/sentry/monitors/models.py
+++ b/src/sentry/monitors/models.py
@@ -540,6 +540,8 @@ class MonitorEnvironmentManager(BaseManager["MonitorEnvironment"]):
 
				     def ensure_environment(
			
 
				         self, project: Project, monitor: Monitor, environment_name: str | None
			
 
				     ) -> MonitorEnvironment:
			
 
				+        from sentry.monitors.rate_limit import update_monitor_quota
			
 
				+
			
 
				         if not environment_name:
			
 
				             environment_name = "production"
			
 
				 
			
@@ -549,9 +551,15 @@ class MonitorEnvironmentManager(BaseManager["MonitorEnvironment"]):
 
				         # TODO: assume these objects exist once backfill is completed
			
 
				         environment = Environment.get_or_create(project=project, name=environment_name)
			
 
				 
			
 
				-        return MonitorEnvironment.objects.get_or_create(
			
 
				+        monitor_env, created = MonitorEnvironment.objects.get_or_create(
			
 
				             monitor=monitor, environment=environment, defaults={"status": MonitorStatus.ACTIVE}
			
 
				-        )[0]
			
 
				+        )
			
 
				+
			
 
				+        # recompute per-project monitor check-in rate limit quota
			
 
				+        if created:
			
 
				+            update_monitor_quota(monitor_env)
			
 
				+
			
 
				+        return monitor_env
			
 
				 
			
 
				 
			
 
				 @region_silo_only_model
			
--- a/src/sentry/monitors/rate_limit.py
+++ b/src/sentry/monitors/rate_limit.py
@@ -0,0 +1,100 @@
 
				+from typing import Optional, Tuple
			
 
				+
			
 
				+from django.core.cache import cache
			
 
				+
			
 
				+from sentry import features
			
 
				+from sentry.models.organization import Organization
			
 
				+from sentry.models.project import Project
			
 
				+from sentry.monitors.models import MonitorEnvironment
			
 
				+from sentry.tasks.relay import schedule_invalidate_project_config
			
 
				+
			
 
				+# This module is used by the Quotas system to inform relay of the allowed
			
 
				+# number of check-ins that can be sent per project during the QUOTA_WINDOW.
			
 
				+#
			
 
				+# In an ideal world we would be rate-limiting monitors on the key of
			
 
				+# (organization, monitor_slug, environment). Unfortunately due to how Relay
			
 
				+# handles quota rate-limiting we're at best able to rate-limit per project.
			
 
				+#
			
 
				+# > Relay cannot do rate-limiting like this because it would need to fully
			
 
				+# > unmarshal the event payload to rate-limit on the monitor slug. The project_id
			
 
				+# > is available in the payload headers.
			
 
				+#
			
 
				+# To allow the monitors system to take-advantage of Relay's quota system to be
			
 
				+# used for rate-limiting, we can rate-limit per project by providing relay with
			
 
				+# a quota for the DataCategory.MONITOR that is calculated based on how many
			
 
				+# monitors the project has.
			
 
				+#
			
 
				+# There are some caveats to this:
			
 
				+#
			
 
				+# - For upserted monitors we need to give some amount of minimum quota, to
			
 
				+#   allow monitors to be created even when there are 0 monitors within a
			
 
				+#   project (since otherwise it would be completely rate-limited)
			
 
				+#
			
 
				+# - It is possible that one monitor may saturate the quota, thus rate-limiting
			
 
				+#   all other monitors within that project. This is an accepted side-effect of
			
 
				+#   doing rate-limiting this way.
			
 
				+#
			
 
				+# - We give each monitor a 'grace' number of allowed check-ins. Ideally each
			
 
				+#   monitor should only be checking in once per minute, so the quota could just
			
 
				+#   be the number of monitors in the project. However,
			
 
				+
			
 
				+
			
 
				+# Monitor check-in limits are per minute. This maps to the smallest check-in
			
 
				+# window that we support.
			
 
				+QUOTA_WINDOW = 60
			
 
				+
			
 
				+# Determines how many check-ins per-minute will be allowed per monitor. This is
			
 
				+# used when computing the QuotaConfig for the DataCategory.MONITOR (check-ins)
			
 
				+#
			
 
				+# These are the 'grace' check-ins as described above.
			
 
				+ALLOWED_CHECK_INS_PER_MONITOR = 5
			
 
				+
			
 
				+# The minimum rate-limit per project for the DataCategory.MONITOR. This value
			
 
				+# should be high enough that it allows for a large number of monitors to be
			
 
				+# upserted without hitting the project rate-limit.
			
 
				+ALLOWED_MINIMUM = 50
			
 
				+
			
 
				+
			
 
				+def get_project_monitor_quota(
			
 
				+    project: Project,
			
 
				+    cache_bust=False,
			
 
				+) -> Tuple[Optional[int], Optional[int]]:
			
 
				+    """
			
 
				+    Determines the rate-limit for monitor check-ins across a particular
			
 
				+    project.
			
 
				+
			
 
				+    :return: A (limit, window) tuple. (None, None) indicates no rate-limit
			
 
				+    """
			
 
				+    organization = Organization.objects.get_from_cache(id=project.organization_id)
			
 
				+
			
 
				+    if not features.has("organizations:monitors-quota-rate-limit", organization=organization):
			
 
				+        return (None, None)
			
 
				+
			
 
				+    limit = None
			
 
				+    cache_key = f"project:{project.id}:monitor-env-count"
			
 
				+
			
 
				+    # Cache rate-limit computation. This function will be called often by the
			
 
				+    # Quotas system.
			
 
				+    if not cache_bust:
			
 
				+        limit = cache.get(cache_key)
			
 
				+
			
 
				+    if limit is None:
			
 
				+        monitor_count = MonitorEnvironment.objects.filter(monitor__project_id=project.id).count()
			
 
				+        limit = monitor_count * ALLOWED_CHECK_INS_PER_MONITOR
			
 
				+        cache.set(cache_key, limit, 600)
			
 
				+
			
 
				+    return (ALLOWED_MINIMUM + limit, QUOTA_WINDOW)
			
 
				+
			
 
				+
			
 
				+def update_monitor_quota(monitor_env: MonitorEnvironment):
			
 
				+    """
			
 
				+    When new monitor environments are created we recompute the per-project
			
 
				+    monitor check-in rate limit QuotaConfig in relay.
			
 
				+    """
			
 
				+    project = Project.objects.get_from_cache(id=monitor_env.monitor.project_id)
			
 
				+
			
 
				+    get_project_monitor_quota(project, cache_bust=True)
			
 
				+    schedule_invalidate_project_config(
			
 
				+        project_id=project.id,
			
 
				+        trigger="monitors:monitor_created",
			
 
				+    )
			
--- a/src/sentry/quotas/base.py
+++ b/src/sentry/quotas/base.py
@@ -457,6 +457,11 @@ class Quota(Service):
 
				                     reason_code=reason_codes[quota.scope],
			
 
				                 )
			
 
				 
			
 
				+    def get_monitor_quota(self, project):
			
 
				+        from sentry.monitors.rate_limit import get_project_monitor_quota
			
 
				+
			
 
				+        return get_project_monitor_quota(project)
			
 
				+
			
 
				     def get_project_quota(self, project):
			
 
				         from sentry.models.options.organization_option import OrganizationOption
			
 
				         from sentry.models.organization import Organization
			
--- a/src/sentry/quotas/redis.py
+++ b/src/sentry/quotas/redis.py
@@ -87,6 +87,22 @@ class RedisQuota(Quota):
 
				                     )
			
 
				                 )
			
 
				 
			
 
				+        with sentry_sdk.start_span(op="redis.get_quotas.get_monitor_quota") as span:
			
 
				+            span.set_tag("project.id", project.id)
			
 
				+            mrlquota = self.get_monitor_quota(project)
			
 
				+            if mrlquota[0] is not None:
			
 
				+                results.append(
			
 
				+                    QuotaConfig(
			
 
				+                        id="mrl",
			
 
				+                        limit=mrlquota[0],
			
 
				+                        window=mrlquota[1],
			
 
				+                        scope=QuotaScope.PROJECT,
			
 
				+                        scope_id=project.id,
			
 
				+                        categories=[DataCategory.MONITOR],
			
 
				+                        reason_code="monitor_rate_limit",
			
 
				+                    )
			
 
				+                )
			
 
				+
			
 
				         if key and not keys:
			
 
				             keys = [key]
			
 
				         elif not keys:
			
--- a/tests/sentry/monitors/test_rate_limit.py
+++ b/tests/sentry/monitors/test_rate_limit.py
@@ -0,0 +1,75 @@
 
				+from unittest import mock
			
 
				+
			
 
				+from sentry.monitors.models import (
			
 
				+    Monitor,
			
 
				+    MonitorEnvironment,
			
 
				+    MonitorStatus,
			
 
				+    MonitorType,
			
 
				+    ScheduleType,
			
 
				+)
			
 
				+from sentry.monitors.rate_limit import get_project_monitor_quota
			
 
				+from sentry.testutils.cases import TestCase
			
 
				+from sentry.testutils.helpers.features import with_feature
			
 
				+
			
 
				+
			
 
				+@mock.patch("sentry.monitors.rate_limit.QUOTA_WINDOW", 45)
			
 
				+@mock.patch("sentry.monitors.rate_limit.ALLOWED_CHECK_INS_PER_MONITOR", 2)
			
 
				+@mock.patch("sentry.monitors.rate_limit.ALLOWED_MINIMUM", 5)
			
 
				+class MonitorRateLimit(TestCase):
			
 
				+    def test_disabled(self):
			
 
				+        """
			
 
				+        Disabled without feature flag
			
 
				+        """
			
 
				+        limit, window = get_project_monitor_quota(self.project)
			
 
				+        assert limit is None
			
 
				+        assert window is None
			
 
				+
			
 
				+    @with_feature("organizations:monitors-quota-rate-limit")
			
 
				+    def test_minimum(self):
			
 
				+        """
			
 
				+        Without any monitor environments we'll always return ALLOWED_MINIMUM.
			
 
				+        """
			
 
				+        limit, window = get_project_monitor_quota(self.project)
			
 
				+        assert limit == 5
			
 
				+        assert window == 45
			
 
				+
			
 
				+    @with_feature("organizations:monitors-quota-rate-limit")
			
 
				+    def test_computed_from_environments(self):
			
 
				+        """
			
 
				+        Validate that the quota is computed from the total number of monitor
			
 
				+        environments in a project.
			
 
				+        """
			
 
				+        monitor = Monitor.objects.create(
			
 
				+            organization_id=self.organization.id,
			
 
				+            project_id=self.project.id,
			
 
				+            type=MonitorType.CRON_JOB,
			
 
				+            config={"schedule": "*/5 * * * *", "schedule_type": ScheduleType.CRONTAB},
			
 
				+        )
			
 
				+
			
 
				+        mon_env_count = 10
			
 
				+        for i in range(mon_env_count):
			
 
				+            env = self.create_environment(self.project, name=f"test-{i}")
			
 
				+            MonitorEnvironment.objects.create(
			
 
				+                monitor=monitor,
			
 
				+                environment=env,
			
 
				+                status=MonitorStatus.OK,
			
 
				+            )
			
 
				+
			
 
				+        # The rate limit is per project, create another monitor in a different
			
 
				+        # project to validate it is not counted
			
 
				+        project2 = self.create_project(organization=self.organization)
			
 
				+        monitor2 = Monitor.objects.create(
			
 
				+            organization_id=self.organization.id,
			
 
				+            project_id=project2.id,
			
 
				+            type=MonitorType.CRON_JOB,
			
 
				+            config={"schedule": "*/5 * * * *", "schedule_type": ScheduleType.CRONTAB},
			
 
				+        )
			
 
				+        MonitorEnvironment.objects.create(
			
 
				+            monitor=monitor2,
			
 
				+            environment=self.environment,
			
 
				+            status=MonitorStatus.OK,
			
 
				+        )
			
 
				+
			
 
				+        limit, window = get_project_monitor_quota(self.project, cache_bust=True)
			
 
				+        assert limit == 25
			
 
				+        assert window == 45
			
--- a/tests/sentry/quotas/test_redis.py
+++ b/tests/sentry/quotas/test_redis.py
@@ -76,6 +76,7 @@ class RedisQuotaTest(TestCase):
 
				         # AssertionError: reject-all quotas cannot be tracked
			
 
				         self.get_project_quota.return_value = (100, 10)
			
 
				         self.get_organization_quota.return_value = (1000, 10)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				 
			
 
				         # A negative quota means reject-all.
			
 
				         self.organization.update_option("project-abuse-quota.error-limit", -1)
			
@@ -208,6 +209,7 @@ class RedisQuotaTest(TestCase):
 
				         # AssertionError: reject-all quotas cannot be tracked
			
 
				         self.get_project_quota.return_value = (100, 10)
			
 
				         self.get_organization_quota.return_value = (1000, 10)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				 
			
 
				         self.organization.update_option("project-abuse-quota.transaction-limit", 600)
			
 
				         with self.feature({"organizations:transaction-metrics-extraction": False}):
			
@@ -235,9 +237,17 @@ class RedisQuotaTest(TestCase):
 
				         ) as self.get_organization_quota:
			
 
				             yield
			
 
				 
			
 
				+    @pytest.fixture(autouse=True)
			
 
				+    def _patch_get_monitor_quota(self):
			
 
				+        with mock.patch.object(
			
 
				+            RedisQuota, "get_monitor_quota", return_value=(0, 60)
			
 
				+        ) as self.get_monitor_quota:
			
 
				+            yield
			
 
				+
			
 
				     def test_uses_defined_quotas(self):
			
 
				         self.get_project_quota.return_value = (200, 60)
			
 
				         self.get_organization_quota.return_value = (300, 60)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				         quotas = self.quota.get_quotas(self.project)
			
 
				 
			
 
				         assert quotas[0].id == "p"
			
@@ -250,6 +260,11 @@ class RedisQuotaTest(TestCase):
 
				         assert quotas[1].scope_id == str(self.organization.id)
			
 
				         assert quotas[1].limit == 300
			
 
				         assert quotas[1].window == 60
			
 
				+        assert quotas[2].id == "mrl"
			
 
				+        assert quotas[2].scope == QuotaScope.PROJECT
			
 
				+        assert quotas[2].scope_id == str(self.project.id)
			
 
				+        assert quotas[2].limit == 15
			
 
				+        assert quotas[2].window == 60
			
 
				 
			
 
				     @mock.patch("sentry.quotas.redis.is_rate_limited")
			
 
				     @mock.patch.object(RedisQuota, "get_quotas", return_value=[])
			
@@ -262,12 +277,14 @@ class RedisQuotaTest(TestCase):
 
				     def test_is_not_limited_without_rejections(self, is_rate_limited):
			
 
				         self.get_organization_quota.return_value = (100, 60)
			
 
				         self.get_project_quota.return_value = (200, 60)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				         assert not self.quota.is_rate_limited(self.project).is_limited
			
 
				 
			
 
				     @mock.patch("sentry.quotas.redis.is_rate_limited", return_value=(True, False))
			
 
				     def test_is_limited_on_rejections(self, is_rate_limited):
			
 
				         self.get_organization_quota.return_value = (100, 60)
			
 
				         self.get_project_quota.return_value = (200, 60)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				         assert self.quota.is_rate_limited(self.project).is_limited
			
 
				 
			
 
				     @mock.patch.object(RedisQuota, "get_quotas")
			
@@ -323,6 +340,7 @@ class RedisQuotaTest(TestCase):
 
				 
			
 
				         self.get_project_quota.return_value = (200, 60)
			
 
				         self.get_organization_quota.return_value = (300, 60)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				 
			
 
				         n = 10
			
 
				         for _ in range(n):
			
@@ -336,9 +354,13 @@ class RedisQuotaTest(TestCase):
 
				 
			
 
				         usage = self.quota.get_usage(self.project.organization_id, all_quotas, timestamp=timestamp)
			
 
				 
			
 
				-        # Only quotas with an ID are counted in Redis (via this ID). Assume the
			
 
				-        # count for these quotas and None for the others.
			
 
				-        assert usage == [n if q.id else None for q in quotas] + [0, 0]
			
 
				+        assert usage == [
			
 
				+            n,  # project quota is consumed
			
 
				+            n,  # organization quota is consumed
			
 
				+            0,  # monitor quota is not consumed
			
 
				+            0,  # unlimited quota is not consumed
			
 
				+            0,  # dummy quota is not consumed
			
 
				+        ]
			
 
				 
			
 
				     @mock.patch.object(RedisQuota, "get_quotas")
			
 
				     def test_refund_defaults(self, mock_get_quotas):
			
@@ -443,6 +465,7 @@ class RedisQuotaTest(TestCase):
 
				 
			
 
				         self.get_project_quota.return_value = (200, 60)
			
 
				         self.get_organization_quota.return_value = (300, 60)
			
 
				+        self.get_monitor_quota.return_value = (15, 60)
			
 
				 
			
 
				         n = 10
			
 
				         for _ in range(n):
			
@@ -461,4 +484,10 @@ class RedisQuotaTest(TestCase):
 
				         # Only quotas with an ID are counted in Redis (via this ID). Assume the
			
 
				         # count for these quotas and None for the others.
			
 
				         # The ``- 1`` is because we refunded once.
			
 
				-        assert usage == [n - 1 if q.id else None for q in quotas] + [0, 0]
			
 
				+        assert usage == [
			
 
				+            n - 1,  # project quota has been refunded one
			
 
				+            n - 1,  # organization quota has been refunded one
			
 
				+            0,  # monitor quota was not consumed
			
 
				+            0,  # unlimited quota was not consumed
			
 
				+            0,  # dummy quota was not consumed
			
 
				+        ]