1 year ago · b466fbcefb
--- a/src/sentry/monitors/models.py
+++ b/src/sentry/monitors/models.py
@@ -480,7 +480,16 @@ class MonitorEnvironment(Model):
 
				     def get_audit_log_data(self):
			
 
				         return {"name": self.environment.name, "status": self.status, "monitor": self.monitor.name}
			
 
				 
			
 
				-    def mark_failed(self, last_checkin=None, reason=MonitorFailure.UNKNOWN):
			
 
				+    def get_last_successful_checkin(self):
			
 
				+        return (
			
 
				+            MonitorCheckIn.objects.filter(monitor_environment=self, status=CheckInStatus.OK)
			
 
				+            .order_by("-date_added")
			
 
				+            .first()
			
 
				+        )
			
 
				+
			
 
				+    def mark_failed(
			
 
				+        self, last_checkin=None, reason=MonitorFailure.UNKNOWN, occurrence_context=None
			
 
				+    ):
			
 
				         from sentry.signals import monitor_environment_failed
			
 
				 
			
 
				         if last_checkin is None:
			
@@ -529,7 +538,16 @@ class MonitorEnvironment(Model):
 
				             from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence
			
 
				             from sentry.issues.producer import produce_occurrence_to_kafka
			
 
				 
			
 
				-            occurrence_data = get_occurrence_data(reason)
			
 
				+            if not occurrence_context:
			
 
				+                occurrence_context = {}
			
 
				+
			
 
				+            occurrence_data = get_occurrence_data(reason, **occurrence_context)
			
 
				+
			
 
				+            # Get last successful check-in to show in evidence display
			
 
				+            last_successful_checkin_timestamp = "None"
			
 
				+            last_successful_checkin = self.get_last_successful_checkin()
			
 
				+            if last_successful_checkin:
			
 
				+                last_successful_checkin_timestamp = last_successful_checkin.date_added.isoformat()
			
 
				 
			
 
				             occurrence = IssueOccurrence(
			
 
				                 id=uuid.uuid4().hex,
			
@@ -541,14 +559,16 @@ class MonitorEnvironment(Model):
 
				                 ],
			
 
				                 type=occurrence_data["group_type"],
			
 
				                 issue_title=f"Monitor failure: {self.monitor.name}",
			
 
				-                subtitle="",
			
 
				+                subtitle=occurrence_data["subtitle"],
			
 
				                 evidence_display=[
			
 
				                     IssueEvidence(
			
 
				                         name="Failure reason", value=occurrence_data["reason"], important=True
			
 
				                     ),
			
 
				                     IssueEvidence(name="Environment", value=self.environment.name, important=False),
			
 
				                     IssueEvidence(
			
 
				-                        name="Last check-in", value=last_checkin.isoformat(), important=False
			
 
				+                        name="Last successful check-in",
			
 
				+                        value=last_successful_checkin_timestamp,
			
 
				+                        important=False,
			
 
				                     ),
			
 
				                 ],
			
 
				                 evidence_data={},
			
@@ -613,13 +633,30 @@ class MonitorEnvironment(Model):
 
				         MonitorEnvironment.objects.filter(id=self.id).exclude(last_checkin__gt=ts).update(**params)
			
 
				 
			
 
				 
			
 
				-def get_occurrence_data(reason: str):
			
 
				+def get_occurrence_data(reason: str, **kwargs):
			
 
				     if reason == MonitorFailure.MISSED_CHECKIN:
			
 
				-        return {"group_type": MonitorCheckInMissed, "level": "warning", "reason": "missed_checkin"}
			
 
				+        expected_time = kwargs.get("expected_time", "the expected time")
			
 
				+        return {
			
 
				+            "group_type": MonitorCheckInMissed,
			
 
				+            "level": "warning",
			
 
				+            "reason": "missed_checkin",
			
 
				+            "subtitle": f"No check-in reported at {expected_time}.",
			
 
				+        }
			
 
				     elif reason == MonitorFailure.DURATION:
			
 
				-        return {"group_type": MonitorCheckInTimeout, "level": "error", "reason": "duration"}
			
 
				+        timeout = kwargs.get("timeout", 30)
			
 
				+        return {
			
 
				+            "group_type": MonitorCheckInTimeout,
			
 
				+            "level": "error",
			
 
				+            "reason": "duration",
			
 
				+            "subtitle": f"Check-in exceeded maximum duration of {timeout} minutes.",
			
 
				+        }
			
 
				 
			
 
				-    return {"group_type": MonitorCheckInFailure, "level": "error", "reason": "error"}
			
 
				+    return {
			
 
				+        "group_type": MonitorCheckInFailure,
			
 
				+        "level": "error",
			
 
				+        "reason": "error",
			
 
				+        "subtitle": "An error occurred during the last check-in.",
			
 
				+    }
			
 
				 
			
 
				 
			
 
				 @receiver(pre_save, sender=MonitorEnvironment)
			
--- a/src/sentry/monitors/tasks.py
+++ b/src/sentry/monitors/tasks.py
@@ -88,7 +88,10 @@ def check_monitors(current_datetime=None):
 
				                 expected_time=expected_time,
			
 
				                 monitor_config=monitor.get_validated_config(),
			
 
				             )
			
 
				-            monitor_environment.mark_failed(reason=MonitorFailure.MISSED_CHECKIN)
			
 
				+            monitor_environment.mark_failed(
			
 
				+                reason=MonitorFailure.MISSED_CHECKIN,
			
 
				+                occurrence_context={"expected_time": expected_time},
			
 
				+            )
			
 
				         except Exception:
			
 
				             logger.exception("Exception in check_monitors - mark missed")
			
 
				 
			
@@ -124,6 +127,9 @@ def check_monitors(current_datetime=None):
 
				                 status__in=[CheckInStatus.OK, CheckInStatus.ERROR],
			
 
				             ).exists()
			
 
				             if not has_newer_result:
			
 
				-                monitor_environment.mark_failed(reason=MonitorFailure.DURATION)
			
 
				+                monitor_environment.mark_failed(
			
 
				+                    reason=MonitorFailure.DURATION,
			
 
				+                    occurrence_context={"duration": (timeout.seconds // 60) % 60},
			
 
				+                )
			
 
				         except Exception:
			
 
				             logger.exception("Exception in check_monitors - mark timeout")
			
--- a/tests/sentry/monitors/test_models.py
+++ b/tests/sentry/monitors/test_models.py
@@ -13,7 +13,9 @@ from sentry.issues.grouptype import (
 
				     MonitorCheckInTimeout,
			
 
				 )
			
 
				 from sentry.monitors.models import (
			
 
				+    CheckInStatus,
			
 
				     Monitor,
			
 
				+    MonitorCheckIn,
			
 
				     MonitorEnvironment,
			
 
				     MonitorEnvironmentLimitsExceeded,
			
 
				     MonitorFailure,
			
@@ -314,6 +316,13 @@ class MonitorEnvironmentTestCase(TestCase):
 
				             status=monitor.status,
			
 
				         )
			
 
				 
			
 
				+        successful_check_in = MonitorCheckIn.objects.create(
			
 
				+            monitor=monitor,
			
 
				+            monitor_environment=monitor_environment,
			
 
				+            project_id=self.project.id,
			
 
				+            status=CheckInStatus.OK,
			
 
				+        )
			
 
				+
			
 
				         last_checkin = timezone.now()
			
 
				         assert monitor_environment.mark_failed(last_checkin=last_checkin)
			
 
				 
			
@@ -328,7 +337,7 @@ class MonitorEnvironmentTestCase(TestCase):
 
				                 "project_id": self.project.id,
			
 
				                 "fingerprint": [hash_from_values(["monitor", str(monitor.guid), "error"])],
			
 
				                 "issue_title": f"Monitor failure: {monitor.name}",
			
 
				-                "subtitle": "",
			
 
				+                "subtitle": "An error occurred during the last check-in.",
			
 
				                 "resource_id": None,
			
 
				                 "evidence_data": {},
			
 
				                 "evidence_display": [
			
@@ -339,8 +348,8 @@ class MonitorEnvironmentTestCase(TestCase):
 
				                         "important": False,
			
 
				                     },
			
 
				                     {
			
 
				-                        "name": "Last check-in",
			
 
				-                        "value": last_checkin.isoformat(),
			
 
				+                        "name": "Last successful check-in",
			
 
				+                        "value": successful_check_in.date_added.isoformat(),
			
 
				                         "important": False,
			
 
				                     },
			
 
				                 ],
			
@@ -391,9 +400,17 @@ class MonitorEnvironmentTestCase(TestCase):
 
				             environment=self.environment,
			
 
				             status=monitor.status,
			
 
				         )
			
 
				+        successful_check_in = MonitorCheckIn.objects.create(
			
 
				+            monitor=monitor,
			
 
				+            monitor_environment=monitor_environment,
			
 
				+            project_id=self.project.id,
			
 
				+            status=CheckInStatus.OK,
			
 
				+        )
			
 
				         last_checkin = timezone.now()
			
 
				         assert monitor_environment.mark_failed(
			
 
				-            last_checkin=last_checkin, reason=MonitorFailure.DURATION
			
 
				+            last_checkin=last_checkin,
			
 
				+            reason=MonitorFailure.DURATION,
			
 
				+            occurrence_context={"duration": 30},
			
 
				         )
			
 
				 
			
 
				         assert len(mock_produce_occurrence_to_kafka.mock_calls) == 1
			
@@ -407,7 +424,7 @@ class MonitorEnvironmentTestCase(TestCase):
 
				                 "project_id": self.project.id,
			
 
				                 "fingerprint": [hash_from_values(["monitor", str(monitor.guid), "duration"])],
			
 
				                 "issue_title": f"Monitor failure: {monitor.name}",
			
 
				-                "subtitle": "",
			
 
				+                "subtitle": "Check-in exceeded maximum duration of 30 minutes.",
			
 
				                 "resource_id": None,
			
 
				                 "evidence_data": {},
			
 
				                 "evidence_display": [
			
@@ -418,8 +435,8 @@ class MonitorEnvironmentTestCase(TestCase):
 
				                         "important": False,
			
 
				                     },
			
 
				                     {
			
 
				-                        "name": "Last check-in",
			
 
				-                        "value": last_checkin.isoformat(),
			
 
				+                        "name": "Last successful check-in",
			
 
				+                        "value": successful_check_in.date_added.isoformat(),
			
 
				                         "important": False,
			
 
				                     },
			
 
				                 ],
			
@@ -471,8 +488,11 @@ class MonitorEnvironmentTestCase(TestCase):
 
				             status=monitor.status,
			
 
				         )
			
 
				         last_checkin = timezone.now()
			
 
				+        expected_time = monitor.get_next_scheduled_checkin_without_margin(last_checkin)
			
 
				         assert monitor_environment.mark_failed(
			
 
				-            last_checkin=last_checkin, reason=MonitorFailure.MISSED_CHECKIN
			
 
				+            last_checkin=last_checkin,
			
 
				+            reason=MonitorFailure.MISSED_CHECKIN,
			
 
				+            occurrence_context={"expected_time": expected_time},
			
 
				         )
			
 
				 
			
 
				         monitor.refresh_from_db()
			
@@ -490,7 +510,7 @@ class MonitorEnvironmentTestCase(TestCase):
 
				                 "project_id": self.project.id,
			
 
				                 "fingerprint": [hash_from_values(["monitor", str(monitor.guid), "missed_checkin"])],
			
 
				                 "issue_title": f"Monitor failure: {monitor.name}",
			
 
				-                "subtitle": "",
			
 
				+                "subtitle": f"No check-in reported at {expected_time}.",
			
 
				                 "resource_id": None,
			
 
				                 "evidence_data": {},
			
 
				                 "evidence_display": [
			
@@ -501,8 +521,8 @@ class MonitorEnvironmentTestCase(TestCase):
 
				                         "important": False,
			
 
				                     },
			
 
				                     {
			
 
				-                        "name": "Last check-in",
			
 
				-                        "value": last_checkin.isoformat(),
			
 
				+                        "name": "Last successful check-in",
			
 
				+                        "value": "None",
			
 
				                         "important": False,
			
 
				                     },
			
 
				                 ],