Browse Source

feat(perf issues): Frequency based alerts (#39522)

Enable frequency based alerts for performance issues. Similarly to
https://github.com/getsentry/sentry/pull/39120 it chooses the model
based on the issue category.

Co-authored-by: Dan Fuller <dfuller@sentry.io>
Colleen O'Rourke 2 years ago
parent
commit
21a8ca3207

+ 5 - 4
src/sentry/rules/conditions/event_frequency.py

@@ -13,6 +13,7 @@ from django.utils import timezone
 
 from sentry import release_health, tsdb
 from sentry.eventstore.models import Event
+from sentry.issues.constants import ISSUE_TSDB_GROUP_MODELS, ISSUE_TSDB_USER_GROUP_MODELS
 from sentry.receivers.rules import DEFAULT_RULE_LABEL
 from sentry.rules import EventState
 from sentry.rules.conditions.base import EventCondition
@@ -138,7 +139,6 @@ class BaseEventFrequencyCondition(EventCondition, abc.ABC):
     def get_rate(self, event: Event, interval: str, environment_id: str) -> int:
         _, duration = self.intervals[interval]
         end = timezone.now()
-
         # For conditions with interval >= 1 hour we don't need to worry about read your writes
         # consistency. Disable it so that we can scale to more nodes.
         option_override_cm = contextlib.nullcontext()
@@ -187,7 +187,7 @@ class EventFrequencyCondition(BaseEventFrequencyCondition):
 
     def query_hook(self, event: Event, start: datetime, end: datetime, environment_id: str) -> int:
         sums: Mapping[int, int] = self.tsdb.get_sums(
-            model=self.tsdb.models.group,
+            model=ISSUE_TSDB_GROUP_MODELS[event.group.issue_category],
             keys=[event.group_id],
             start=start,
             end=end,
@@ -204,7 +204,7 @@ class EventUniqueUserFrequencyCondition(BaseEventFrequencyCondition):
 
     def query_hook(self, event: Event, start: datetime, end: datetime, environment_id: str) -> int:
         totals: Mapping[int, int] = self.tsdb.get_distinct_counts_totals(
-            model=self.tsdb.models.users_affected_by_group,
+            model=ISSUE_TSDB_USER_GROUP_MODELS[event.group.issue_category],
             keys=[event.group_id],
             start=start,
             end=end,
@@ -304,8 +304,9 @@ class EventFrequencyPercentCondition(BaseEventFrequencyCondition):
                 percent_intervals[self.get_option("interval")][1].total_seconds() // 60
             )
             avg_sessions_in_interval = session_count_last_hour / (60 / interval_in_minutes)
+
             issue_count = self.tsdb.get_sums(
-                model=self.tsdb.models.group,
+                model=ISSUE_TSDB_GROUP_MODELS[event.group.issue_category],
                 keys=[event.group_id],
                 start=start,
                 end=end,

+ 5 - 6
src/sentry/testutils/perfomance_issues/store_transaction.py

@@ -4,7 +4,6 @@ from typing import Sequence
 
 from django.utils import timezone
 
-from sentry.models import Environment
 from sentry.testutils import SnubaTestCase
 
 
@@ -14,7 +13,7 @@ class PerfIssueTransactionTestMixin:
         project_id: int,
         user_id: str,
         fingerprint: Sequence[str],
-        environment: Environment = None,
+        environment: str = None,
         timestamp: datetime = None,
     ):
         from sentry.utils import snuba
@@ -34,13 +33,13 @@ class PerfIssueTransactionTestMixin:
             "start_timestamp": insert_time.timestamp(),
             "received": insert_time.timestamp(),
             # we need to randomize the value here to make sure ingestion doesn't dedupe these transactions
-            "transaction": "transaction: " + str(insert_time) + str(random.randint(0, 1000)),
+            "transaction": "transaction: " + str(insert_time) + str(random.randint(0, 100000000)),
             "fingerprint": fingerprint,
         }
 
         if environment:
-            event_data["environment"] = environment.name
-            event_data["tags"].extend([("environment", environment.name)])
+            event_data["environment"] = environment
+            event_data["tags"].extend([("environment", environment)])
 
         event = self.store_event(
             data=event_data,
@@ -68,7 +67,7 @@ class PerfIssueTransactionTestMixin:
         assert result["data"][0]["project_id"] == project_id
         assert result["data"][0]["group_ids"] == [g.id for g in event.groups]
         assert result["data"][0]["tags[sentry:user]"] == user_id_val
-        assert result["data"][0]["environment"] == (environment.name if environment else None)
+        assert result["data"][0]["environment"] == (environment)
         assert result["data"][0]["timestamp"] == insert_time.isoformat()
 
         return event

+ 2 - 2
tests/snuba/api/serializers/test_group.py

@@ -442,7 +442,7 @@ class PerformanceGroupSerializerSnubaTest(
                 proj.id,
                 "user1",
                 [first_group_fingerprint],
-                environment,
+                environment.name,
                 timestamp=timestamp + timedelta(minutes=1),
             )
 
@@ -450,7 +450,7 @@ class PerformanceGroupSerializerSnubaTest(
             proj.id,
             "user2",
             [first_group_fingerprint],
-            environment,
+            environment.name,
             timestamp=timestamp + timedelta(minutes=2),
         )
 

+ 105 - 37
tests/snuba/rules/conditions/test_event_frequency.py

@@ -4,6 +4,7 @@ from datetime import timedelta
 from unittest.mock import patch
 from uuid import uuid4
 
+import pytz
 from django.utils.timezone import now
 from freezegun import freeze_time
 
@@ -15,7 +16,9 @@ from sentry.rules.conditions.event_frequency import (
 )
 from sentry.testutils.cases import RuleTestCase, SnubaTestCase
 from sentry.testutils.helpers.datetime import before_now, iso_format
+from sentry.testutils.perfomance_issues.store_transaction import PerfIssueTransactionTestMixin
 from sentry.testutils.silo import region_silo_test
+from sentry.types.issues import GroupType
 
 
 class FrequencyConditionMixin:
@@ -29,13 +32,13 @@ class FrequencyConditionMixin:
         rule = self.get_rule(data=data, rule=Rule(environment_id=None))
         environment_rule = self.get_rule(data=data, rule=Rule(environment_id=self.environment.id))
 
-        event = self.store_event(
+        event = self.add_event(
             data={
                 "fingerprint": ["something_random"],
-                "timestamp": iso_format(before_now(minutes=minutes)),
                 "user": {"id": uuid4().hex},
             },
             project_id=self.project.id,
+            timestamp=before_now(minutes=minutes),
         )
         if add_events:
             self.increment(
@@ -58,6 +61,36 @@ class FrequencyConditionMixin:
             self.assertDoesNotPass(environment_rule, event)
 
 
+class ErrorEventMixin:
+    def add_event(self, data, project_id, timestamp):
+        data["timestamp"] = iso_format(timestamp)
+        # Store an error event
+        event = self.store_event(
+            data=data,
+            project_id=project_id,
+        )
+        return event.for_group(event.group)
+
+
+class PerfEventMixin(PerfIssueTransactionTestMixin):
+    def add_event(self, data, project_id, timestamp):
+        fingerprint = data["fingerprint"][0]
+        fingerprint = (
+            fingerprint
+            if "-" in fingerprint
+            else f"{GroupType.PERFORMANCE_N_PLUS_ONE.value}-{data['fingerprint'][0]}"
+        )
+        # Store a performance event
+        event = self.store_transaction(
+            environment=data.get("environment"),
+            project_id=project_id,
+            user_id=data.get("user", uuid4().hex),
+            fingerprint=[fingerprint],
+            timestamp=timestamp.replace(tzinfo=pytz.utc),
+        )
+        return event.for_group(event.groups[0])
+
+
 class StandardIntervalMixin:
     def test_one_minute_with_events(self):
         data = {"interval": "1m", "value": 6}
@@ -102,13 +135,13 @@ class StandardIntervalMixin:
     def test_comparison(self):
         # Test data is 4 events in the current period and 2 events in the comparison period, so
         # a 100% increase.
-        event = self.store_event(
+        event = self.add_event(
             data={
                 "fingerprint": ["something_random"],
-                "timestamp": iso_format(before_now(minutes=1)),
                 "user": {"id": uuid4().hex},
             },
             project_id=self.project.id,
+            timestamp=before_now(minutes=1),
         )
         self.increment(
             event,
@@ -141,13 +174,13 @@ class StandardIntervalMixin:
     def test_comparison_empty_comparison_period(self):
         # Test data is 1 event in the current period and 0 events in the comparison period. This
         # should always result in 0 and never fire.
-        event = self.store_event(
+        event = self.add_event(
             data={
                 "fingerprint": ["something_random"],
-                "timestamp": iso_format(before_now(minutes=1)),
                 "user": {"id": uuid4().hex},
             },
             project_id=self.project.id,
+            timestamp=before_now(minutes=1),
         )
         data = {
             "interval": "1h",
@@ -168,61 +201,49 @@ class StandardIntervalMixin:
         self.assertDoesNotPass(rule, event)
 
 
-@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
-@region_silo_test
 class EventFrequencyConditionTestCase(
-    FrequencyConditionMixin, StandardIntervalMixin, RuleTestCase, SnubaTestCase
+    FrequencyConditionMixin, StandardIntervalMixin, SnubaTestCase
 ):
     rule_cls = EventFrequencyCondition
 
     def increment(self, event, count, environment=None, timestamp=None):
-        data = {
-            "fingerprint": event.data["fingerprint"],
-            "timestamp": iso_format(timestamp) if timestamp else iso_format(before_now(minutes=1)),
-        }
+        timestamp = timestamp if timestamp else before_now(minutes=1)
+        data = {"fingerprint": event.data["fingerprint"]}
         if environment:
             data["environment"] = environment
 
         for _ in range(count):
-            self.store_event(
+            self.add_event(
                 data=data,
                 project_id=self.project.id,
+                timestamp=timestamp,
             )
 
 
-@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
-@region_silo_test
 class EventUniqueUserFrequencyConditionTestCase(
     FrequencyConditionMixin,
     StandardIntervalMixin,
-    RuleTestCase,
     SnubaTestCase,
 ):
     rule_cls = EventUniqueUserFrequencyCondition
 
     def increment(self, event, count, environment=None, timestamp=None):
-        data = {
-            "fingerprint": event.data["fingerprint"],
-            "timestamp": iso_format(timestamp) if timestamp else iso_format(before_now(minutes=1)),
-        }
+        timestamp = timestamp if timestamp else before_now(minutes=1)
+        data = {"fingerprint": event.data["fingerprint"]}
         if environment:
             data["environment"] = environment
 
         for _ in range(count):
             event_data = deepcopy(data)
             event_data["user"] = {"id": uuid4().hex}
-            self.store_event(
+            self.add_event(
                 data=event_data,
                 project_id=self.project.id,
+                timestamp=timestamp,
             )
 
 
-@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
-@region_silo_test
-class EventFrequencyPercentConditionTestCase(
-    RuleTestCase,
-    SnubaTestCase,
-):
+class EventFrequencyPercentConditionTestCase(SnubaTestCase):
     rule_cls = EventFrequencyPercentCondition
 
     def _make_sessions(self, num):
@@ -252,14 +273,14 @@ class EventFrequencyPercentConditionTestCase(
         if not self.environment or self.environment.name != "prod":
             self.environment = self.create_environment(name="prod")
         if not hasattr(self, "test_event"):
-            self.test_event = self.store_event(
+            self.test_event = self.add_event(
                 data={
                     "fingerprint": ["something_random"],
-                    "timestamp": iso_format(before_now(minutes=minutes)),
                     "user": {"id": uuid4().hex},
                     "environment": self.environment.name,
                 },
                 project_id=self.project.id,
+                timestamp=before_now(minutes=minutes),
             )
         if add_events:
             self.increment(
@@ -280,17 +301,18 @@ class EventFrequencyPercentConditionTestCase(
     def increment(self, event, count, environment=None, timestamp=None):
         data = {
             "fingerprint": event.data["fingerprint"],
-            "timestamp": iso_format(timestamp) if timestamp else iso_format(before_now(minutes=1)),
         }
+        timestamp = timestamp if timestamp else before_now(minutes=1)
         if environment:
             data["environment"] = environment
 
         for _ in range(count):
             event_data = deepcopy(data)
             event_data["user"] = {"id": uuid4().hex}
-            self.store_event(
+            self.add_event(
                 data=event_data,
                 project_id=self.project.id,
+                timestamp=timestamp,
             )
 
     @patch("sentry.rules.conditions.event_frequency.MIN_SESSIONS_TO_FIRE", 1)
@@ -358,12 +380,10 @@ class EventFrequencyPercentConditionTestCase(
         # Test data is 2 events in the current period and 1 events in the comparison period.
         # Number of sessions is 20 in each period, so current period is 20% of sessions, prev
         # is 10%. Overall a 100% increase comparitively.
-        event = self.store_event(
-            data={
-                "fingerprint": ["something_random"],
-                "timestamp": iso_format(before_now(minutes=1)),
-            },
+        event = self.add_event(
+            data={"fingerprint": ["something_random"]},
             project_id=self.project.id,
+            timestamp=before_now(minutes=1),
         )
         self.increment(
             event,
@@ -392,3 +412,51 @@ class EventFrequencyPercentConditionTestCase(
         }
         rule = self.get_rule(data=data, rule=Rule(environment_id=None))
         self.assertDoesNotPass(rule, event)
+
+
+@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
+@region_silo_test
+class ErrorIssueFrequencyConditionTestCase(
+    EventFrequencyConditionTestCase, RuleTestCase, ErrorEventMixin
+):
+    pass
+
+
+@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
+@region_silo_test
+class PerfIssueFrequencyConditionTestCase(
+    EventFrequencyConditionTestCase, RuleTestCase, PerfEventMixin
+):
+    pass
+
+
+@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
+@region_silo_test
+class ErrorIssueUniqueUserFrequencyConditionTestCase(
+    EventUniqueUserFrequencyConditionTestCase, RuleTestCase, ErrorEventMixin
+):
+    pass
+
+
+@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
+@region_silo_test
+class PerfIssueUniqueUserFrequencyConditionTestCase(
+    EventUniqueUserFrequencyConditionTestCase, RuleTestCase, PerfEventMixin
+):
+    pass
+
+
+@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
+@region_silo_test
+class ErrorIssueEventFrequencyPercentConditionTestCase(
+    EventFrequencyPercentConditionTestCase, RuleTestCase, ErrorEventMixin
+):
+    pass
+
+
+@freeze_time((now() - timedelta(days=2)).replace(hour=12, minute=40, second=0, microsecond=0))
+@region_silo_test
+class PerfIssueEventFrequencyPercentConditionTestCase(
+    EventFrequencyPercentConditionTestCase, RuleTestCase, PerfEventMixin
+):
+    pass

+ 13 - 9
tests/snuba/tagstore/test_tagstore_backend.py

@@ -979,21 +979,21 @@ class PerfTagStorageTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin)
             self.project.id,
             "user1",
             [first_group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=first_group_timestamp_start + timedelta(minutes=1),
         )
         self.store_transaction(
             self.project.id,
             "user1",
             [first_group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=first_group_timestamp_start + timedelta(minutes=2),
         )
         self.store_transaction(
             self.project.id,
             "user2",
             [first_group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=first_group_timestamp_start + timedelta(minutes=3),
         )
         event_with_first_group = self.store_transaction(
@@ -1010,21 +1010,21 @@ class PerfTagStorageTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin)
             self.project.id,
             "user1",
             [second_group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=second_group_timestamp_start + timedelta(minutes=1),
         )
         self.store_transaction(
             self.project.id,
             "user1",
             [second_group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=second_group_timestamp_start + timedelta(minutes=2),
         )
         self.store_transaction(
             self.project.id,
             "user2",
             [second_group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=second_group_timestamp_start + timedelta(minutes=3),
         )
         event_with_second_group = self.store_transaction(
@@ -1036,7 +1036,7 @@ class PerfTagStorageTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin)
         second_group = event_with_second_group.groups[0]
 
         # should have no effect on user_counts
-        self.store_transaction(self.project.id, "user_nogroup", [], self.environment)
+        self.store_transaction(self.project.id, "user_nogroup", [], self.environment.name)
 
         assert self.ts.get_perf_groups_user_counts(
             [self.project.id],
@@ -1055,12 +1055,16 @@ class PerfTagStorageTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin)
             self.project.id,
             "user1",
             [group_fingerprint],
-            self.environment,
+            self.environment.name,
             timestamp=first_event_ts,
         )
         last_event_ts = start_timestamp + timedelta(hours=1)
         event = self.store_transaction(
-            self.project.id, "user1", [group_fingerprint], self.environment, timestamp=last_event_ts
+            self.project.id,
+            "user1",
+            [group_fingerprint],
+            self.environment.name,
+            timestamp=last_event_ts,
         )
         group = event.groups[0]
 

+ 1 - 1
tests/snuba/tsdb/test_tsdb_backend.py

@@ -529,7 +529,7 @@ class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase, PerfIssueTransactio
 
         for r in range(0, 14400, 600):  # Every 10 min for 4 hours
             event = self.store_transaction(
-                environment=[self.env1, None][(r // 7200) % 3],
+                environment=[self.env1.name, None][(r // 7200) % 3],
                 project_id=self.proj1.id,
                 # change every 55 min so some hours have 1 user, some have 2
                 user_id=f"user{r // 3300}",