Browse Source

feat(relocation): Create cancel/abort endpoints (#61209)

The main difference between these is severity: cancel stops the
relocation in an orderly manner, where certain guarantees about state
can be expected to be upheld, while abort just shuts everything down as
fast as possible.

Issue: getsentry/team-ospo#222
Alex Zaslavsky 1 year ago
parent
commit
76e761784f

+ 57 - 0
src/sentry/api/endpoints/relocations/abort.py

@@ -0,0 +1,57 @@
+from rest_framework.request import Request
+from rest_framework.response import Response
+
+from sentry.api.api_owners import ApiOwner
+from sentry.api.api_publish_status import ApiPublishStatus
+from sentry.api.base import Endpoint, region_silo_endpoint
+from sentry.api.exceptions import ResourceDoesNotExist
+from sentry.api.permissions import SuperuserPermission
+from sentry.api.serializers import serialize
+from sentry.models.relocation import Relocation
+
+ERR_NOT_ABORTABLE_STATUS = (
+    "Relocations can only be cancelled if they are not yet complete; this relocation is `SUCCESS`."
+)
+
+
+@region_silo_endpoint
+class RelocationAbortEndpoint(Endpoint):
+    owner = ApiOwner.RELOCATION
+    publish_status = {
+        # TODO(getsentry/team-ospo#214): Stabilize before GA.
+        "PUT": ApiPublishStatus.EXPERIMENTAL,
+    }
+    permission_classes = (SuperuserPermission,)
+
+    def put(self, request: Request, relocation_uuid: str) -> Response:
+        """
+        Immediately aborts an in-progress relocation.
+        ``````````````````````````````````````````````````
+
+        This operation differs from the superficially similar `/cancel/` endpoint in that it does
+        not attempt to do an orderly teardown, and instead fails the relocation immediately. An
+        abrupt shutdown like this could leave data in an unpredictable state, so unless you have a
+        very good reason, you should prefer `/cancel/` to `/abort/`, and only use the latter when
+        the former fails.
+
+        :pparam string relocation_uuid: a UUID identifying the relocation.
+
+        :auth: required
+        """
+
+        try:
+            relocation: Relocation = Relocation.objects.get(uuid=relocation_uuid)
+        except Relocation.DoesNotExist:
+            raise ResourceDoesNotExist
+
+        if relocation.status in {Relocation.Status.FAILURE.value, Relocation.Status.SUCCESS.value}:
+            return Response(
+                {"detail": ERR_NOT_ABORTABLE_STATUS},
+                status=400,
+            )
+
+        relocation.status = Relocation.Status.FAILURE.value
+        relocation.failure_reason = "This relocation was aborted by an administrator."
+        relocation.save()
+
+        return self.respond(serialize(relocation))

+ 114 - 0
src/sentry/api/endpoints/relocations/cancel.py

@@ -0,0 +1,114 @@
+from string import Template
+
+from django.db import DatabaseError
+from django.db.models import F
+from rest_framework.request import Request
+from rest_framework.response import Response
+
+from sentry.api.api_owners import ApiOwner
+from sentry.api.api_publish_status import ApiPublishStatus
+from sentry.api.base import Endpoint, region_silo_endpoint
+from sentry.api.endpoints.relocations import ERR_UNKNOWN_RELOCATION_STEP
+from sentry.api.exceptions import ResourceDoesNotExist
+from sentry.api.permissions import SuperuserPermission
+from sentry.api.serializers import serialize
+from sentry.models.relocation import Relocation
+
+ERR_NOT_CANCELLABLE_STATUS = (
+    "Relocations can only be cancelled if they are not yet complete; this relocation is `SUCCESS`."
+)
+ERR_COULD_NOT_CANCEL_RELOCATION = (
+    "Could not cancel relocation, perhaps because it has already completed."
+)
+ERR_COULD_NOT_CANCEL_RELOCATION_AT_STEP = Template(
+    """Could not cancel relocation at step `$step`; this is likely because this step has already
+    started."""
+)
+
+
+@region_silo_endpoint
+class RelocationCancelEndpoint(Endpoint):
+    owner = ApiOwner.RELOCATION
+    publish_status = {
+        # TODO(getsentry/team-ospo#214): Stabilize before GA.
+        "PUT": ApiPublishStatus.EXPERIMENTAL,
+    }
+    permission_classes = (SuperuserPermission,)
+
+    def put(self, request: Request, relocation_uuid: str) -> Response:
+        """
+        Cancel an in-progress relocation.
+        ``````````````````````````````````````````````````
+
+        This operation differs from the superficially similar `/abort/` endpoint in that it does an orderly cancellation, making sure to complete currently active relocation step before stopping. Conversely, the `/abort/` endpoint merely stops work as quickly as possible.
+
+        This command accepts a single optional parameter, which specifies the step BEFORE which the
+        cancellation should occur. If no such parameter is specified, the cancellation is scheduled
+        for the step immediately following the currently active one, if possible.
+
+        :pparam string relocation_uuid: a UUID identifying the relocation.
+        :param string atStep: an optional string identifying the step to cancel prior to; must be
+                               greater than the currently active step, and one of: `PREPROCESSING`,
+                               `VALIDATING`, `IMPORTING`, `POSTPROCESSING`, `NOTIFYING`.
+
+        :auth: required
+        """
+
+        try:
+            relocation: Relocation = Relocation.objects.get(uuid=relocation_uuid)
+        except Relocation.DoesNotExist:
+            raise ResourceDoesNotExist
+
+        if relocation.status == Relocation.Status.FAILURE.value:
+            return self.respond(serialize(relocation))
+        if relocation.status == Relocation.Status.SUCCESS.value:
+            return Response(
+                {"detail": ERR_NOT_CANCELLABLE_STATUS},
+                status=400,
+            )
+
+        at_step = request.data.get("atStep", None)
+        if at_step is not None:
+            try:
+                step = Relocation.Step[at_step.upper()]
+            except KeyError:
+                return Response(
+                    {"detail": ERR_UNKNOWN_RELOCATION_STEP.substitute(step=at_step)},
+                    status=400,
+                )
+
+            if step in {
+                Relocation.Step.UNKNOWN,
+                Relocation.Step.UPLOADING,
+                Relocation.Step.COMPLETED,
+            }:
+                return Response(
+                    {"detail": ERR_COULD_NOT_CANCEL_RELOCATION_AT_STEP.substitute(step=at_step)},
+                    status=400,
+                )
+
+            try:
+                relocation.scheduled_cancel_at_step = step.value
+                relocation.save()
+            except DatabaseError:
+                return Response(
+                    {"detail": ERR_COULD_NOT_CANCEL_RELOCATION},
+                    status=400,
+                )
+            pass
+        else:
+            try:
+                updated = Relocation.objects.filter(
+                    uuid=relocation.uuid, step__lt=Relocation.Step.COMPLETED.value - 1
+                ).update(scheduled_cancel_at_step=F("step") + 1)
+                if not updated:
+                    raise DatabaseError("Cannot set `scheduled_cancel_at_step` to `COMPLETED`")
+
+                relocation.refresh_from_db()
+            except DatabaseError:
+                return Response(
+                    {"detail": ERR_COULD_NOT_CANCEL_RELOCATION},
+                    status=400,
+                )
+
+        return self.respond(serialize(relocation))

+ 12 - 0
src/sentry/api/urls.py

@@ -38,6 +38,8 @@ from sentry.api.endpoints.release_thresholds.release_threshold_index import (
 from sentry.api.endpoints.release_thresholds.release_threshold_status_index import (
     ReleaseThresholdStatusIndexEndpoint,
 )
+from sentry.api.endpoints.relocations.abort import RelocationAbortEndpoint
+from sentry.api.endpoints.relocations.cancel import RelocationCancelEndpoint
 from sentry.api.endpoints.relocations.details import RelocationDetailsEndpoint
 from sentry.api.endpoints.relocations.index import RelocationIndexEndpoint
 from sentry.api.endpoints.relocations.pause import RelocationPauseEndpoint
@@ -757,6 +759,16 @@ RELOCATION_URLS = [
         RelocationDetailsEndpoint.as_view(),
         name="sentry-api-0-relocations-details",
     ),
+    re_path(
+        r"^(?P<relocation_uuid>[^\/]+)/abort/$",
+        RelocationAbortEndpoint.as_view(),
+        name="sentry-api-0-relocations-abort",
+    ),
+    re_path(
+        r"^(?P<relocation_uuid>[^\/]+)/cancel/$",
+        RelocationCancelEndpoint.as_view(),
+        name="sentry-api-0-relocations-cancel",
+    ),
     re_path(
         r"^(?P<relocation_uuid>[^\/]+)/pause/$",
         RelocationPauseEndpoint.as_view(),

+ 92 - 0
tests/sentry/api/endpoints/relocations/test_abort.py

@@ -0,0 +1,92 @@
+from datetime import datetime, timezone
+from uuid import uuid4
+
+from sentry.api.endpoints.relocations.abort import ERR_NOT_ABORTABLE_STATUS
+from sentry.models.relocation import Relocation
+from sentry.testutils.cases import APITestCase
+from sentry.testutils.silo import region_silo_test
+from sentry.utils.relocation import OrderedTask
+
+TEST_DATE_ADDED = datetime(2023, 1, 23, 1, 23, 45, tzinfo=timezone.utc)
+
+
+@region_silo_test
+class AbortRelocationTest(APITestCase):
+    endpoint = "sentry-api-0-relocations-abort"
+
+    def setUp(self):
+        super().setUp()
+        self.owner = self.create_user(
+            email="owner", is_superuser=False, is_staff=True, is_active=True
+        )
+        self.superuser = self.create_user(
+            "superuser", is_superuser=True, is_staff=True, is_active=True
+        )
+        self.relocation: Relocation = Relocation.objects.create(
+            date_added=TEST_DATE_ADDED,
+            creator_id=self.superuser.id,
+            owner_id=self.owner.id,
+            status=Relocation.Status.IN_PROGRESS.value,
+            step=Relocation.Step.PREPROCESSING.value,
+            want_org_slugs='["foo"]',
+            want_usernames='["alice", "bob"]',
+            latest_notified=Relocation.EmailKind.STARTED.value,
+            latest_task=OrderedTask.PREPROCESSING_SCAN.name,
+            latest_task_attempts=1,
+        )
+
+    def test_good_abort_in_progress(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/abort/")
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.FAILURE.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+
+    def test_good_abort_paused(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.PAUSE.value
+        self.relocation.save()
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/abort/")
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.FAILURE.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+
+    def test_bad_already_succeeded(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.SUCCESS.value
+        self.relocation.save()
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/abort/")
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_NOT_ABORTABLE_STATUS
+
+    def test_bad_already_failed(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.FAILURE.value
+        self.relocation.save()
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/abort/")
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_NOT_ABORTABLE_STATUS
+
+    def test_bad_not_found(self):
+        self.login_as(user=self.superuser, superuser=True)
+        does_not_exist_uuid = uuid4().hex
+        response = self.client.put(f"/api/0/relocations/{str(does_not_exist_uuid)}/abort/")
+
+        assert response.status_code == 404
+
+    def test_bad_no_auth(self):
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/abort/")
+
+        assert response.status_code == 401
+
+    def test_bad_no_superuser(self):
+        self.login_as(user=self.superuser, superuser=False)
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/abort/")
+
+        assert response.status_code == 403

+ 228 - 0
tests/sentry/api/endpoints/relocations/test_cancel.py

@@ -0,0 +1,228 @@
+from datetime import datetime, timezone
+from uuid import uuid4
+
+from sentry.api.endpoints.relocations import ERR_UNKNOWN_RELOCATION_STEP
+from sentry.api.endpoints.relocations.cancel import (
+    ERR_COULD_NOT_CANCEL_RELOCATION,
+    ERR_COULD_NOT_CANCEL_RELOCATION_AT_STEP,
+    ERR_NOT_CANCELLABLE_STATUS,
+)
+from sentry.models.relocation import Relocation
+from sentry.testutils.cases import APITestCase
+from sentry.testutils.silo import region_silo_test
+from sentry.utils.relocation import OrderedTask
+
+TEST_DATE_ADDED = datetime(2023, 1, 23, 1, 23, 45, tzinfo=timezone.utc)
+
+
+@region_silo_test
+class CancelRelocationTest(APITestCase):
+    endpoint = "sentry-api-0-relocations-cancel"
+
+    def setUp(self):
+        super().setUp()
+        self.owner = self.create_user(
+            email="owner", is_superuser=False, is_staff=True, is_active=True
+        )
+        self.superuser = self.create_user(
+            "superuser", is_superuser=True, is_staff=True, is_active=True
+        )
+        self.relocation: Relocation = Relocation.objects.create(
+            date_added=TEST_DATE_ADDED,
+            creator_id=self.superuser.id,
+            owner_id=self.owner.id,
+            status=Relocation.Status.IN_PROGRESS.value,
+            step=Relocation.Step.PREPROCESSING.value,
+            want_org_slugs='["foo"]',
+            want_usernames='["alice", "bob"]',
+            latest_notified=Relocation.EmailKind.STARTED.value,
+            latest_task=OrderedTask.PREPROCESSING_SCAN.name,
+            latest_task_attempts=1,
+        )
+
+    def test_good_cancel_in_progress_at_next_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/")
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.IN_PROGRESS.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert response.data["scheduledCancelAtStep"] == Relocation.Step.VALIDATING.name
+
+    def test_good_cancel_paused_at_next_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.PAUSE.value
+        self.relocation.save()
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/")
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.PAUSE.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert response.data["scheduledCancelAtStep"] == Relocation.Step.VALIDATING.name
+
+    def test_good_cancel_in_progress_at_specified_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.IMPORTING.name},
+        )
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.IN_PROGRESS.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert response.data["scheduledCancelAtStep"] == Relocation.Step.IMPORTING.name
+
+    def test_good_cancel_paused_at_specified_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.PAUSE.value
+        self.relocation.save()
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.IMPORTING.name},
+        )
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.PAUSE.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert response.data["scheduledCancelAtStep"] == Relocation.Step.IMPORTING.name
+
+    def test_good_cancel_at_future_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.NOTIFYING.name},
+        )
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.IN_PROGRESS.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert response.data["scheduledCancelAtStep"] == Relocation.Step.NOTIFYING.name
+
+    def test_good_already_cancelled(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.scheduled_cancel_at_step = Relocation.Step.IMPORTING.value
+        self.relocation.save()
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.IMPORTING.name},
+        )
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.IN_PROGRESS.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert response.data["scheduledCancelAtStep"] == Relocation.Step.IMPORTING.name
+
+    def test_good_already_failed(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.FAILURE.value
+        self.relocation.save()
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.PREPROCESSING.name},
+        )
+
+        assert response.status_code == 200
+        assert response.data["status"] == Relocation.Status.FAILURE.name
+        assert response.data["step"] == Relocation.Step.PREPROCESSING.name
+        assert not response.data["scheduledCancelAtStep"]
+
+    def test_bad_not_found(self):
+        self.login_as(user=self.superuser, superuser=True)
+        does_not_exist_uuid = uuid4().hex
+        response = self.client.put(f"/api/0/relocations/{str(does_not_exist_uuid)}/cancel/")
+
+        assert response.status_code == 404
+
+    def test_bad_already_succeeded(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.status = Relocation.Status.SUCCESS.value
+        self.relocation.save()
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/")
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_NOT_CANCELLABLE_STATUS
+
+    def test_bad_invalid_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": "nonexistent"},
+        )
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_UNKNOWN_RELOCATION_STEP.substitute(
+            step="nonexistent"
+        )
+
+    def test_bad_unknown_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.UNKNOWN.name},
+        )
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_COULD_NOT_CANCEL_RELOCATION_AT_STEP.substitute(
+            step=Relocation.Step.UNKNOWN.name
+        )
+
+    def test_bad_current_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.PREPROCESSING.name},
+        )
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_COULD_NOT_CANCEL_RELOCATION
+
+    def test_bad_past_step(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.UPLOADING.name},
+        )
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_COULD_NOT_CANCEL_RELOCATION_AT_STEP.substitute(
+            step=Relocation.Step.UPLOADING.name
+        )
+
+    def test_bad_last_step_specified(self):
+        self.login_as(user=self.superuser, superuser=True)
+        response = self.client.put(
+            f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/",
+            {"atStep": Relocation.Step.COMPLETED.name},
+        )
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_COULD_NOT_CANCEL_RELOCATION_AT_STEP.substitute(
+            step=Relocation.Step.COMPLETED.name
+        )
+
+    def test_bad_last_step_automatic(self):
+        self.login_as(user=self.superuser, superuser=True)
+        self.relocation.step = Relocation.Step.NOTIFYING.value
+        self.relocation.save()
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/")
+
+        assert response.status_code == 400
+        assert response.data.get("detail") is not None
+        assert response.data.get("detail") == ERR_COULD_NOT_CANCEL_RELOCATION
+
+    def test_bad_no_auth(self):
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/")
+
+        assert response.status_code == 401
+
+    def test_bad_no_superuser(self):
+        self.login_as(user=self.superuser, superuser=False)
+        response = self.client.put(f"/api/0/relocations/{str(self.relocation.uuid)}/cancel/")
+
+        assert response.status_code == 403