Browse Source

feat(issue-priority): Add a minimum threshold for high priority issue alert conditions (#67752)

Adding a minimum threshold that must be met for the high priority
condition to kick in:
- more than 10 issues created per week for 3 weeks or
- more than 20 issues created per week for two weeks. 

This is done by adding a celery task that will run daily to check the
new issue volume over the past 3 weeks. If the condition is met, we'll
set the `new_issue_threshold_met: True` in the rule's data. The rule
will check this field before determining whether to use the
HighPriorityIssueCondition or default to the FirstSeenEventCondition.

From
[here](https://www.notion.so/sentry/Priority-Alerts-EA-GA-d9e115d501b5460fa354d0020b36fafc?pvs=4#9d5f4a7e07cc4bf4a65bf49dd7858c2e)

#66337
Snigdha Sharma 10 months ago
parent
commit
51bb6e01b0

+ 2 - 0
src/sentry/conf/server.py

@@ -745,6 +745,7 @@ CELERY_IMPORTS = (
     "sentry.tasks.backfill_outboxes",
     "sentry.tasks.beacon",
     "sentry.tasks.check_auth",
+    "sentry.tasks.check_new_issue_threshold_met",
     "sentry.tasks.clear_expired_snoozes",
     "sentry.tasks.clear_expired_rulesnoozes",
     "sentry.tasks.codeowners.code_owners_auto_sync",
@@ -937,6 +938,7 @@ CELERY_QUEUES_REGION = [
     Queue("nudge.invite_missing_org_members", routing_key="invite_missing_org_members"),
     Queue("auto_resolve_issues", routing_key="auto_resolve_issues"),
     Queue("on_demand_metrics", routing_key="on_demand_metrics"),
+    Queue("check_new_issue_threshold_met", routing_key="check_new_issue_threshold_met"),
     Queue("integrations_slack_activity_notify", routing_key="integrations_slack_activity_notify"),
 ]
 

+ 9 - 0
src/sentry/rules/conditions/high_priority_issue.py

@@ -33,6 +33,15 @@ class HighPriorityIssueCondition(EventCondition):
         if not has_high_priority_issue_alerts(self.project):
             return False
 
+        if not self.rule:
+            return state.is_new
+
+        if not event.project.flags.has_high_priority_alerts:
+            if self.rule.environment_id is None:
+                return state.is_new
+
+            return state.is_new_group_environment
+
         is_escalating = state.has_reappeared or state.has_escalated
         if features.has("projects:issue-priority", self.project):
             if not event.group:

+ 79 - 0
src/sentry/tasks/check_new_issue_threshold_met.py

@@ -0,0 +1,79 @@
+from datetime import timedelta
+
+from django.db.models import Count, F, Q
+from django.utils import timezone
+
+from sentry.models.group import Group
+from sentry.models.project import Project
+from sentry.tasks.base import instrumented_task
+
+NEW_ISSUE_WEEKLY_THRESHOLD = 10
+new_issue_threshold_key = (
+    lambda project_id: f"issues.priority.calculate_new_issue_threshold:{project_id}"
+)
+
+
+def calculate_threshold_met(project_id: int) -> bool:
+    """
+    Calculate whether the new issue threshold has been met. The threshold is met if:
+    - There are 10 new issues per week for the last 3 weeks
+    - There are 20 new issues per week for the last 2 weeks
+    """
+
+    one_week_ago = timezone.now() - timedelta(weeks=1)
+    two_weeks_ago = timezone.now() - timedelta(weeks=2)
+    three_weeks_ago = timezone.now() - timedelta(weeks=3)
+
+    counts = Group.objects.filter(
+        project_id=project_id,
+        first_seen__gte=three_weeks_ago,
+    ).aggregate(
+        last_week=Count("id", filter=Q(first_seen__gte=one_week_ago)),
+        two_weeks_ago=Count(
+            "id", filter=Q(first_seen__gte=two_weeks_ago, first_seen__lt=one_week_ago)
+        ),
+        three_weeks_ago=Count(
+            "id", filter=Q(first_seen__gte=three_weeks_ago, first_seen__lt=two_weeks_ago)
+        ),
+    )
+
+    # Case 1: The weekly threshold has been met for the last 3 weeks
+    condition_1 = (
+        counts["last_week"] >= NEW_ISSUE_WEEKLY_THRESHOLD
+        and counts["two_weeks_ago"] >= NEW_ISSUE_WEEKLY_THRESHOLD
+        and counts["three_weeks_ago"] >= NEW_ISSUE_WEEKLY_THRESHOLD
+    )
+
+    # Case 2: The weekly threshold has been doubled for the last 2 weeks
+    condition_2 = (
+        counts["last_week"] >= 2 * NEW_ISSUE_WEEKLY_THRESHOLD
+        and counts["two_weeks_ago"] >= 2 * NEW_ISSUE_WEEKLY_THRESHOLD
+    )
+
+    return condition_1 or condition_2
+
+
+@instrumented_task(
+    name="sentry.tasks.check_new_issue_threshold_met",
+    queue="check_new_issue_threshold_met",
+    default_retry_delay=60,
+    max_retries=1,
+)
+def check_new_issue_threshold_met(project_id: int) -> None:
+    """
+    Check if the new issue threshold has been met for a project and sets the project flag accordingly.
+
+    The calculation is done once per day and the result is cached for 24 hours.
+    Rules with {new_issue_threshold_met: False} will default to using the FirstSeenEventCondition condition when applied.
+    """
+    try:
+        project = Project.objects.get(id=project_id)
+    except Project.DoesNotExist:
+        return
+
+    if project.flags.has_high_priority_alerts:
+        return
+
+    threshold_met = calculate_threshold_met(project.id)
+    if threshold_met:
+        project.update(flags=F("flags").bitor(Project.flags.has_high_priority_alerts))

+ 44 - 0
src/sentry/tasks/post_process.py

@@ -1460,6 +1460,49 @@ def should_postprocess_feedback(job: PostProcessJob) -> bool:
     return False
 
 
+def check_has_high_priority_alerts(job: PostProcessJob) -> None:
+    """
+    Determine if we should fire a task to check if the new issue
+    threshold has been met to enable high priority alerts.
+    """
+    try:
+        event = job["event"]
+        if event.project.flags.has_high_priority_alerts:
+            return
+
+        from sentry.tasks.check_new_issue_threshold_met import (
+            check_new_issue_threshold_met,
+            new_issue_threshold_key,
+        )
+
+        # If the new issue volume has already been checked today, don't recalculate regardless of the value
+        project_key = new_issue_threshold_key(event.project_id)
+        threshold_met = cache.get(project_key)
+        if threshold_met is not None:
+            return
+
+        try:
+            lock = locks.get(project_key, duration=10)
+            with lock.acquire():
+                # If the threshold has already been calculated today, don't recalculate regardless of the value
+                task_scheduled = cache.get(project_key)
+                if task_scheduled is not None:
+                    return
+
+                check_new_issue_threshold_met.delay(event.project.id)
+
+                # Add the key to cache for 24 hours
+                cache.set(project_key, True, 60 * 60 * 24)
+        except UnableToAcquireLock:
+            pass
+    except Exception as e:
+        logger.warning(
+            "Failed to check new issue threshold met",
+            repr(e),
+            extra={"project_id": event.project_id},
+        )
+
+
 MAX_NEW_ESCALATION_AGE_HOURS = 24
 MIN_EVENTS_FOR_NEW_ESCALATION = 10
 
@@ -1551,6 +1594,7 @@ GROUP_CATEGORY_POST_PROCESS_PIPELINE = {
         _capture_group_stats,
         process_snoozes,
         process_inbox_adds,
+        check_has_high_priority_alerts,
         detect_new_escalation,
         process_commits,
         handle_owner_assignment,

+ 52 - 19
tests/sentry/rules/conditions/test_high_priority_issue.py

@@ -1,3 +1,4 @@
+from sentry.models.rule import Rule
 from sentry.rules.conditions.high_priority_issue import HighPriorityIssueCondition
 from sentry.testutils.cases import RuleTestCase
 from sentry.testutils.helpers.features import with_feature
@@ -10,29 +11,61 @@ pytestmark = [requires_snuba]
 class HighPriorityIssueConditionTest(RuleTestCase):
     rule_cls = HighPriorityIssueCondition
 
+    def setUp(self):
+        self.rule = Rule(environment_id=1, project=self.project, label="label")
+
+    def test_without_flag(self):
+        rule = self.get_rule(rule=self.rule)
+
+        self.assertDoesNotPass(rule, self.event, is_new=False)
+        self.assertDoesNotPass(rule, self.event, is_new=True)
+
+    @with_feature("projects:high-priority-alerts")
+    def test_without_threshold_and_environment(self):
+        self.rule.environment_id = None
+        self.rule.save()
+        rule = self.get_rule(rule=self.rule)
+
+        self.assertPasses(rule, self.event, is_new=True)
+        self.assertPasses(rule, self.event, is_new=True)
+        self.assertDoesNotPass(rule, self.event, is_new=False)
+        self.assertDoesNotPass(rule, self.event, is_new=False)
+
+    @with_feature("projects:high-priority-alerts")
+    def test_without_threshold_with_environment(self):
+        rule = self.get_rule(rule=self.rule)
+
+        self.assertPasses(rule, self.event, is_new=True, is_new_group_environment=True)
+        self.assertPasses(rule, self.event, is_new=False, is_new_group_environment=True)
+        self.assertPasses(rule, self.event, is_new=False, is_new_group_environment=True)
+
+        self.assertDoesNotPass(rule, self.event, is_new=True, is_new_group_environment=False)
+        self.assertDoesNotPass(rule, self.event, is_new=False, is_new_group_environment=False)
+        self.assertDoesNotPass(
+            rule, self.event, is_new=False, is_new_group_environment=False, has_escalated=True
+        )
+
     @with_feature("projects:high-priority-alerts")
     @with_feature("projects:issue-priority")
-    def test_applies_correctly(self):
-        rule = self.get_rule()
-        event = self.get_event()
+    def test_with_threshold_and_priority(self):
+        self.project.flags.has_high_priority_alerts = True
+        self.project.save()
+
+        rule = self.get_rule(rule=self.rule)
 
-        # This will never pass for non-new or non-escalating issuesalways pass
-        event.group.update(priority=PriorityLevel.HIGH)
-        self.assertPasses(rule, event, is_new=True, has_reappeared=False, has_escalated=False)
-        self.assertPasses(rule, event, is_new=False, has_reappeared=True, has_escalated=False)
-        self.assertPasses(rule, event, is_new=False, has_reappeared=True, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=False, has_escalated=False)
+        # This will only pass for new or escalating issues
+        self.event.group.update(priority=PriorityLevel.HIGH)
+        self.assertPasses(rule, self.event, is_new=True, has_reappeared=False)
+        self.assertPasses(rule, self.event, is_new=False, has_reappeared=True)
+        self.assertPasses(rule, self.event, is_new=False, has_reappeared=False, has_escalated=True)
+        self.assertDoesNotPass(rule, self.event, is_new=False, has_reappeared=False)
 
         # This will never pass
-        event.group.update(priority=PriorityLevel.LOW)
-        self.assertDoesNotPass(rule, event, is_new=True, has_reappeared=False, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=True, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=True, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=False, has_escalated=False)
+        self.event.group.update(priority=PriorityLevel.LOW)
+        self.assertDoesNotPass(rule, self.event, is_new=True)
+        self.assertDoesNotPass(rule, self.event, is_new=False, has_escalated=True)
 
         # This will never pass
-        event.group.update(priority=PriorityLevel.MEDIUM)
-        self.assertDoesNotPass(rule, event, is_new=True, has_reappeared=False, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=True, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=True, has_escalated=False)
-        self.assertDoesNotPass(rule, event, is_new=False, has_reappeared=False, has_escalated=False)
+        self.event.group.update(priority=PriorityLevel.MEDIUM)
+        self.assertDoesNotPass(rule, self.event, is_new=True)
+        self.assertDoesNotPass(rule, self.event, is_new=False, has_escalated=True)

+ 79 - 0
tests/sentry/tasks/test_check_new_issue_threshold_met.py

@@ -0,0 +1,79 @@
+from unittest.mock import patch
+
+from sentry.tasks.check_new_issue_threshold_met import (
+    NEW_ISSUE_WEEKLY_THRESHOLD,
+    calculate_threshold_met,
+    check_new_issue_threshold_met,
+)
+from sentry.testutils.cases import TestCase
+from sentry.testutils.helpers.datetime import before_now, iso_format
+
+
+class CheckNewIssueThresholdMetTest(TestCase):
+    def setUp(self):
+        self.project = self.create_project()
+        self.project.flags.has_high_priority_alerts = False
+        self.project.save()
+
+    @patch("sentry.tasks.check_new_issue_threshold_met.calculate_threshold_met", return_value=False)
+    def test_threshold_not_met(self, mock_calculate):
+        assert not self.project.flags.has_high_priority_alerts
+
+        check_new_issue_threshold_met(self.project.id)
+        self.project.refresh_from_db()
+
+        assert mock_calculate.call_count == 1
+        assert not self.project.flags.has_high_priority_alerts
+
+    @patch("sentry.tasks.check_new_issue_threshold_met.calculate_threshold_met")
+    def test_threshold_already_met(self, mock_calculate):
+        self.project.flags.has_high_priority_alerts = True
+        self.project.save()
+
+        check_new_issue_threshold_met(self.project.id)
+
+        self.project.refresh_from_db()
+
+        assert mock_calculate.call_count == 0
+        assert self.project.flags.has_high_priority_alerts
+
+    @patch("sentry.tasks.check_new_issue_threshold_met.calculate_threshold_met", return_value=True)
+    def test_threshold_newly_met(self, mock_calculate):
+        assert not self.project.flags.has_high_priority_alerts
+        check_new_issue_threshold_met(self.project.id)
+
+        self.project.refresh_from_db()
+
+        assert mock_calculate.call_count == 1
+        assert self.project.flags.has_high_priority_alerts
+
+
+class CalculateThresholdMetTest(TestCase):
+    def test_threshold_not_met(self):
+        assert not calculate_threshold_met(self.project.id)
+
+    def test_threshold_met_condition_1(self):
+        for weeks in range(3):
+            for i in range(NEW_ISSUE_WEEKLY_THRESHOLD):
+                self.store_event(
+                    data={
+                        "fingerprint": [f"group-{weeks}-{i}"],
+                        "timestamp": iso_format(before_now(days=7 * weeks)),
+                    },
+                    project_id=self.project.id,
+                )
+
+        assert calculate_threshold_met(self.project.id)
+
+    def test_threshold_met_condition_2(self):
+        for weeks in range(2):
+            for i in range(2 * NEW_ISSUE_WEEKLY_THRESHOLD):
+                self.store_event(
+                    data={
+                        "fingerprint": [f"group-{weeks}-{i}"],
+                        "timestamp": iso_format(before_now(days=7 * weeks)),
+                    },
+                    project_id=self.project.id,
+                )
+
+        assert calculate_threshold_met(self.project.id)