1 year ago · 3d6f9fa4f2
--- a/src/sentry/monitors/logic/mark_failed.py
+++ b/src/sentry/monitors/logic/mark_failed.py
@@ -14,7 +14,6 @@ from sentry.issues.grouptype import (
 
				     MonitorCheckInMissed,
			
 
				     MonitorCheckInTimeout,
			
 
				 )
			
 
				-from sentry.issues.producer import PayloadType
			
 
				 from sentry.models.organization import Organization
			
 
				 from sentry.monitors.constants import SUBTITLE_DATETIME_FORMAT, TIMEOUT
			
 
				 from sentry.monitors.models import (
			
@@ -249,7 +248,7 @@ def create_issue_platform_occurrence(
 
				     fingerprint=None,
			
 
				 ):
			
 
				     from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence
			
 
				-    from sentry.issues.producer import produce_occurrence_to_kafka
			
 
				+    from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
			
 
				 
			
 
				     monitor_env = failed_checkin.monitor_environment
			
 
				     current_timestamp = datetime.utcnow().replace(tzinfo=timezone.utc)
			
@@ -301,7 +300,7 @@ def create_issue_platform_occurrence(
 
				         "contexts": {"monitor": get_monitor_environment_context(monitor_env)},
			
 
				         "environment": monitor_env.environment.name,
			
 
				         "event_id": occurrence.event_id,
			
 
				-        "fingerprint": fingerprint
			
 
				+        "fingerprint": [fingerprint]
			
 
				         if fingerprint
			
 
				         else [
			
 
				             "monitor",
			
--- a/src/sentry/monitors/logic/mark_ok.py
+++ b/src/sentry/monitors/logic/mark_ok.py
@@ -43,22 +43,52 @@ def mark_ok(checkin: MonitorCheckIn, ts: datetime):
 
				                 previous_checkin["status"] == CheckInStatus.OK
			
 
				                 for previous_checkin in previous_checkins
			
 
				             )
			
 
				+        else:
			
 
				+            # Mark any open incidents as recovering by default
			
 
				+            incident_recovering = True
			
 
				 
			
 
				-            # Resolve the incident
			
 
				-            if incident_recovering:
			
 
				-                MonitorIncident.objects.filter(
			
 
				-                    monitor_environment=monitor_env,
			
 
				-                    grouphash=monitor_env.incident_grouphash,
			
 
				-                ).update(
			
 
				-                    resolving_checkin=checkin,
			
 
				-                    resolving_timestamp=checkin.date_added,
			
 
				-                )
			
 
				+        # Resolve any open incidents
			
 
				+        if incident_recovering:
			
 
				+            # TODO(rjo100): Check for multiple open incidents where we only
			
 
				+            # resolved if recovery_threshold was set and not faiure_issue_threshold
			
 
				+            active_incidents = MonitorIncident.objects.filter(
			
 
				+                monitor_environment=monitor_env,
			
 
				+                resolving_checkin__isnull=True,
			
 
				+            )
			
 
				 
			
 
				+            # Only send an occurrence if we have an active incident
			
 
				+            for grouphash in active_incidents.values_list("grouphash", flat=True):
			
 
				+                resolve_incident_group(grouphash, checkin.monitor.project_id)
			
 
				+            if active_incidents.update(
			
 
				+                resolving_checkin=checkin,
			
 
				+                resolving_timestamp=checkin.date_added,
			
 
				+            ):
			
 
				                 params["last_state_change"] = ts
			
 
				-            else:
			
 
				-                # Don't update status if incident isn't recovered
			
 
				-                params.pop("status", None)
			
 
				+        else:
			
 
				+            # Don't update status if incident isn't recovered
			
 
				+            params.pop("status", None)
			
 
				 
			
 
				     MonitorEnvironment.objects.filter(id=monitor_env.id).exclude(last_checkin__gt=ts).update(
			
 
				         **params
			
 
				     )
			
 
				+
			
 
				+
			
 
				+def resolve_incident_group(
			
 
				+    fingerprint: str,
			
 
				+    project_id: int,
			
 
				+):
			
 
				+    from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
			
 
				+    from sentry.issues.status_change_message import StatusChangeMessage
			
 
				+    from sentry.models.group import GroupStatus
			
 
				+
			
 
				+    status_change = StatusChangeMessage(
			
 
				+        fingerprint=[fingerprint],
			
 
				+        project_id=project_id,
			
 
				+        new_status=GroupStatus.RESOLVED,
			
 
				+        new_substatus=None,
			
 
				+    )
			
 
				+
			
 
				+    produce_occurrence_to_kafka(
			
 
				+        payload_type=PayloadType.STATUS_CHANGE,
			
 
				+        status_change=status_change,
			
 
				+    )
			
--- a/tests/sentry/monitors/logic/test_mark_ok.py
+++ b/tests/sentry/monitors/logic/test_mark_ok.py
@@ -1,7 +1,10 @@
 
				 from datetime import timedelta
			
 
				+from unittest.mock import patch
			
 
				 
			
 
				 from django.utils import timezone
			
 
				 
			
 
				+from sentry.issues.producer import PayloadType
			
 
				+from sentry.models.group import GroupStatus
			
 
				 from sentry.monitors.logic.mark_ok import mark_ok
			
 
				 from sentry.monitors.models import (
			
 
				     CheckInStatus,
			
@@ -62,7 +65,8 @@ class MarkOkTestCase(TestCase):
 
				         assert monitor_environment.next_checkin_latest == now + timedelta(minutes=2)
			
 
				         assert monitor_environment.last_checkin == now
			
 
				 
			
 
				-    def test_mark_ok_recovery_threshold(self):
			
 
				+    @patch("sentry.issues.producer.produce_occurrence_to_kafka")
			
 
				+    def test_mark_ok_recovery_threshold(self, mock_produce_occurrence_to_kafka):
			
 
				         now = timezone.now().replace(second=0, microsecond=0)
			
 
				 
			
 
				         recovery_threshold = 8
			
@@ -126,13 +130,17 @@ class MarkOkTestCase(TestCase):
 
				         # Incident has not resolved
			
 
				         assert incident.resolving_checkin is None
			
 
				         assert incident.resolving_timestamp is None
			
 
				+        # no status change is sent to kafka occurrence consumer
			
 
				+        assert len(mock_produce_occurrence_to_kafka.mock_calls) == 0
			
 
				 
			
 
				         # create another failed check-in to break the chain
			
 
				+        now = now + timedelta(minutes=1)
			
 
				         last_checkin = MonitorCheckIn.objects.create(
			
 
				             monitor=monitor,
			
 
				             monitor_environment=monitor_environment,
			
 
				             project_id=self.project.id,
			
 
				             status=CheckInStatus.ERROR,
			
 
				+            date_added=now,
			
 
				         )
			
 
				 
			
 
				         # Still not resolved
			
@@ -166,3 +174,21 @@ class MarkOkTestCase(TestCase):
 
				         # Incident resolved
			
 
				         assert incident.resolving_checkin == last_checkin
			
 
				         assert incident.resolving_timestamp == last_checkin.date_added
			
 
				+
			
 
				+        # assert status change is sent to kafka occurrence consumer
			
 
				+        assert len(mock_produce_occurrence_to_kafka.mock_calls) == 1
			
 
				+
			
 
				+        kwargs = mock_produce_occurrence_to_kafka.call_args.kwargs
			
 
				+        payload_type, status_change = kwargs["payload_type"], kwargs["status_change"]
			
 
				+        status_change = status_change.to_dict()
			
 
				+
			
 
				+        assert payload_type == PayloadType.STATUS_CHANGE
			
 
				+        assert dict(
			
 
				+            status_change,
			
 
				+            **{
			
 
				+                "fingerprint": [incident.grouphash],
			
 
				+                "project_id": monitor.project_id,
			
 
				+                "new_status": GroupStatus.RESOLVED,
			
 
				+                "new_substatus": None,
			
 
				+            },
			
 
				+        ) == dict(status_change)