Browse Source

Revert "fix(crons): Correctly compute next_checkin for tz aware monitors (#58550)"

This reverts commit ed986f16286f32c459a53cd5df65a03d9bdbe80f.

Co-authored-by: rjo100 <7078270+rjo100@users.noreply.github.com>
getsentry-bot 1 year ago
parent
commit
978589d97d
3 changed files with 20 additions and 92 deletions
  1. 2 5
      src/sentry/monitors/models.py
  2. 8 14
      src/sentry/monitors/tasks.py
  3. 10 73
      tests/sentry/monitors/test_tasks.py

+ 2 - 5
src/sentry/monitors/models.py

@@ -253,10 +253,6 @@ class Monitor(Model):
 
         raise NotImplementedError("unknown schedule_type")
 
-    @property
-    def timezone(self):
-        return pytz.timezone(self.config.get("timezone") or "UTC")
-
     def get_schedule_type_display(self):
         return ScheduleType.get_name(self.config["schedule_type"])
 
@@ -269,7 +265,8 @@ class Monitor(Model):
         """
         from sentry.monitors.schedule import get_next_schedule
 
-        return get_next_schedule(last_checkin.astimezone(self.timezone), self.schedule)
+        tz = pytz.timezone(self.config.get("timezone") or "UTC")
+        return get_next_schedule(last_checkin.astimezone(tz), self.schedule)
 
     def get_next_expected_checkin_latest(self, last_checkin: datetime) -> datetime:
         """

+ 8 - 14
src/sentry/monitors/tasks.py

@@ -165,8 +165,7 @@ def try_monitor_tasks_trigger(ts: datetime, partition: int):
     # close, but in the case of a backlog, this will be much higher
     total_delay = datetime.now().timestamp() - slowest_part_ts
 
-    # Keep tick datetime objects timezone aware
-    tick = timezone.utc.localize(datetime.fromtimestamp(slowest_part_ts))
+    tick = datetime.fromtimestamp(slowest_part_ts)
 
     logger.info("monitors.consumer.clock_tick", extra={"reference_datetime": str(tick)})
     metrics.gauge("monitors.task.clock_delay", total_delay, sample_rate=1.0)
@@ -310,13 +309,11 @@ def mark_environment_missing(monitor_environment_id: int, ts: datetime):
     # Specifically important for interval where it's a function of some
     # starting time.
     #
-    # When computing our timestamps MUST be in the correct timezone of the
-    # monitor to compute the previous schedule
-    most_recent_expected_ts = get_prev_schedule(
-        expected_time.astimezone(monitor.timezone),
-        ts.astimezone(monitor.timezone),
-        monitor.schedule,
-    )
+    # Trim tzinfo. This is always UTC. ts does not have tzinfo and
+    # get_prev_schedule will fail if the start and reference time have
+    # different tzinfo.
+    start_ts = expected_time.replace(tzinfo=None)
+    most_recent_expected_ts = get_prev_schedule(start_ts, ts, monitor.schedule)
 
     mark_failed(checkin, ts=most_recent_expected_ts)
 
@@ -382,10 +379,7 @@ def mark_checkin_timeout(checkin_id: int, ts: datetime):
         # their configured time-out time.
         #
         # See `test_timeout_using_interval`
-        most_recent_expected_ts = get_prev_schedule(
-            checkin.date_added.astimezone(monitor.timezone),
-            ts.astimezone(monitor.timezone),
-            monitor.schedule,
-        )
+        start_ts = checkin.date_added.replace(tzinfo=None)
+        most_recent_expected_ts = get_prev_schedule(start_ts, ts, monitor.schedule)
 
         mark_failed(checkin, ts=most_recent_expected_ts)

+ 10 - 73
tests/sentry/monitors/test_tasks.py

@@ -1,10 +1,9 @@
-from datetime import timedelta
+from datetime import datetime, timedelta
 from typing import MutableMapping
 from unittest import mock
 
 import msgpack
 import pytest
-import pytz
 from arroyo import Partition, Topic
 from arroyo.backends.kafka import KafkaPayload
 from confluent_kafka.admin import PartitionMetadata
@@ -37,10 +36,7 @@ def make_ref_time(**kwargs):
     To accurately reflect the real usage of this task, we want the ref time
     to be truncated down to a minute for our tests.
     """
-    tz_name = kwargs.pop("timezone", "UTC")
-
-    ts = timezone.now().replace(**kwargs, tzinfo=None)
-    ts = pytz.timezone(tz_name).localize(ts)
+    ts = timezone.now().replace(**kwargs)
 
     # Typically the task will not run exactly on the minute, but it will
     # run very close, let's say for our test that it runs 12 seconds after
@@ -49,8 +45,10 @@ def make_ref_time(**kwargs):
     # This is testing that the task correctly clamps its reference time
     # down to the minute.
     #
-    # Task timestamps are in UTC, convert our reference time to UTC for this
-    task_run_ts = ts.astimezone(timezone.utc).replace(second=12, microsecond=0)
+    # NOTE: We also remove the timezone info from the task run timestamp, since
+    # it receives a date time object from the kafka producer. This helps test
+    # for bad timezone
+    task_run_ts = ts.replace(second=12, microsecond=0, tzinfo=None)
 
     # Fan-out tasks recieve a floored version of the timestamp
     sub_task_run_ts = task_run_ts.replace(second=0)
@@ -126,67 +124,6 @@ class MonitorTaskCheckMissingTest(TestCase):
         assert missed_checkin.expected_time == next_checkin
         assert missed_checkin.monitor_config == monitor.config
 
-    @mock.patch("sentry.monitors.tasks.mark_environment_missing")
-    def test_missing_checkin_with_timezone(self, mark_environment_missing_mock):
-        org = self.create_organization()
-        project = self.create_project(organization=org)
-
-        # 1st of Febuary midnight Arizona time
-        task_run_ts, sub_task_run_ts, ts = make_ref_time(
-            month=2, day=1, hour=0, minute=0, timezone="US/Arizona"
-        )
-
-        monitor = Monitor.objects.create(
-            organization_id=org.id,
-            project_id=project.id,
-            type=MonitorType.CRON_JOB,
-            config={
-                "schedule": "0 0 * * *",
-                "schedule_type": ScheduleType.CRONTAB,
-                "timezone": "US/Arizona",
-                "max_runtime": None,
-                "checkin_margin": None,
-            },
-        )
-
-        # Last check-in was yesterday
-        monitor_environment = MonitorEnvironment.objects.create(
-            monitor=monitor,
-            environment=self.environment,
-            last_checkin=ts - timedelta(days=1),
-            next_checkin=ts,
-            next_checkin_latest=ts + timedelta(minutes=1),
-            status=MonitorStatus.OK,
-        )
-
-        # No missed check-ins generated any hour between the last check-in and
-        # the upcoming checkin. Testing like this to validate any kind of
-        # strange timezone related issues.
-        for hour in range(24):
-            check_missing(task_run_ts - timedelta(days=1) + timedelta(hours=hour + 1))
-
-        assert mark_environment_missing_mock.delay.call_count == 0
-
-        # Mark check in missed a minute later
-        check_missing(task_run_ts + timedelta(minutes=1))
-        assert mark_environment_missing_mock.delay.call_count == 1
-
-        # Missed check-in correctly updates
-        mark_environment_missing(monitor_environment.id, sub_task_run_ts + timedelta(minutes=1))
-        monitor_environment.refresh_from_db()
-
-        assert MonitorCheckIn.objects.filter(
-            monitor_environment=monitor_environment.id,
-            status=CheckInStatus.MISSED,
-        ).exists()
-
-        # Use UTC timezone for comparison so failed asserts are easier to read,
-        # since next_checkin will bome back as UTC. This does NOT affect the assert
-        utc_ts = ts.astimezone(timezone.utc)
-
-        assert monitor_environment.next_checkin == utc_ts + timedelta(days=1)
-        assert monitor_environment.next_checkin_latest == utc_ts + timedelta(days=1, minutes=1)
-
     @mock.patch("sentry.monitors.tasks.mark_environment_missing")
     def test_missing_checkin_with_margin(self, mark_environment_missing_mock):
         org = self.create_organization()
@@ -1010,7 +947,7 @@ def test_clock_pulse(checkin_producer_mock):
 
 @mock.patch("sentry.monitors.tasks._dispatch_tasks")
 def test_monitor_task_trigger(dispatch_tasks):
-    now = timezone.now().replace(second=0, microsecond=0)
+    now = datetime.now().replace(second=0, microsecond=0)
 
     # Assumes a single partition for simplicitly. Multi-partition cases are
     # covered in further test cases.
@@ -1046,7 +983,7 @@ def test_monitor_task_trigger_partition_desync(dispatch_tasks):
     timestamps in a non-monotonic order. In this scenario we want to make
     sure we still only trigger once
     """
-    now = timezone.now().replace(second=0, microsecond=0)
+    now = datetime.now().replace(second=0, microsecond=0)
 
     # First message in partition 0 with timestamp just after the minute
     # boundary triggers the task
@@ -1076,7 +1013,7 @@ def test_monitor_task_trigger_partition_sync(dispatch_tasks):
     When the kafka topic has multiple partitions we want to only tick our clock
     forward once all partitions have caught up. This test simulates that
     """
-    now = timezone.now().replace(second=0, microsecond=0)
+    now = datetime.now().replace(second=0, microsecond=0)
 
     # Tick for 4 partitions
     try_monitor_tasks_trigger(ts=now, partition=0)
@@ -1104,7 +1041,7 @@ def test_monitor_task_trigger_partition_tick_skip(dispatch_tasks):
     In a scenario where all partitions move multiple ticks past the slowest
     partition we may end up skipping a tick.
     """
-    now = timezone.now().replace(second=0, microsecond=0)
+    now = datetime.now().replace(second=0, microsecond=0)
 
     # Tick for 4 partitions
     try_monitor_tasks_trigger(ts=now, partition=0)