Browse Source

fix(relocation): Make `want_usernames` filtered (#71625)

The `want_usernames` list will now only include users that are included
in the orgs slugs enumerated in `want_orgs`, not all users provided in
the import file.
Alex Zaslavsky 9 months ago
parent
commit
449dbe5ce6
2 changed files with 89 additions and 34 deletions
  1. 53 28
      src/sentry/tasks/relocation.py
  2. 36 6
      tests/sentry/tasks/test_relocation.py

+ 53 - 28
src/sentry/tasks/relocation.py

@@ -2,6 +2,7 @@ from __future__ import annotations
 
 import logging
 import re
+from collections import defaultdict
 from dataclasses import dataclass
 from datetime import UTC, datetime, timedelta
 from io import BytesIO
@@ -12,12 +13,14 @@ from zipfile import ZipFile
 import yaml
 from celery.app.task import Task
 from cryptography.fernet import Fernet
+from django.core.exceptions import ValidationError
 from django.db import router, transaction
 from google.cloud.devtools.cloudbuild_v1 import Build
 from google.cloud.devtools.cloudbuild_v1 import CloudBuildClient as CloudBuildClient
 from sentry_sdk import capture_exception
 
 from sentry import analytics
+from sentry.api.helpers.slugs import validate_sentry_slug
 from sentry.api.serializers.rest_framework.base import camel_to_snake_case, convert_dict_key_case
 from sentry.backup.crypto import (
     GCPKMSDecryptor,
@@ -34,6 +37,7 @@ from sentry.models.files.utils import get_relocation_storage
 from sentry.models.importchunk import ControlImportChunkReplica, RegionImportChunk
 from sentry.models.lostpasswordhash import LostPasswordHash as LostPasswordHash
 from sentry.models.organization import Organization
+from sentry.models.organizationmember import OrganizationMember
 from sentry.models.relocation import (
     Relocation,
     RelocationFile,
@@ -98,14 +102,16 @@ ERR_PREPROCESSING_DECRYPTION = """Could not decrypt the imported JSON - are you
                                   correct public key?"""
 ERR_PREPROCESSING_INTERNAL = "Internal error during preprocessing."
 ERR_PREPROCESSING_INVALID_JSON = "Invalid input JSON."
+ERR_PREPROCESSING_INVALID_ORG_SLUG = Template(
+    "You asked to import an organization with the slug $slug, which is not formatted correctly."
+)
 ERR_PREPROCESSING_INVALID_TARBALL = "The import tarball you provided was invalid."
 ERR_PREPROCESSING_NO_USERS = "The provided JSON must contain at least one user."
 ERR_PREPROCESSING_TOO_MANY_USERS = Template(
-    f"The provided JSON must contain $count users but must not exceed the limit of {MAX_USERS_PER_RELOCATION}."
+    f"The provided JSON contains $count users who are members of at least one of the organizations you specified, but must not exceed the limit of {MAX_USERS_PER_RELOCATION}."
 )
-ERR_PREPROCESSING_NO_ORGS = "The provided JSON must contain at least one organization."
 ERR_PREPROCESSING_TOO_MANY_ORGS = Template(
-    f"The provided JSON must contain $count organizations, but must not exceed the limit of {MAX_ORGS_PER_RELOCATION}."
+    f"The provided JSON contains $count organizations matching one of the slugs you specified, but must not exceed the limit of {MAX_ORGS_PER_RELOCATION}."
 )
 ERR_PREPROCESSING_MISSING_ORGS = Template(
     "The following organization slug imports were requested, but could not be found in your submitted JSON: $orgs."
@@ -258,19 +264,23 @@ def preprocessing_scan(uuid: str) -> None:
                 fernet = Fernet(plaintext_data_encryption_key)
                 json_data = fernet.decrypt(unwrapped.encrypted_json_blob).decode("utf-8")
 
-            # Grab usernames and org slugs from the JSON data.
-            usernames = []
-            org_slugs = []
+            # Grab usernames and org slugs from the JSON data, and record which users are members of
+            # which orgs.
+            found_user_org_memberships: dict[int, list[int]] = defaultdict(list)
+            found_org_slugs: dict[int, str] = dict()
+            found_usernames: dict[int, str] = dict()
             try:
                 for json_model in json.loads(json_data):
                     model_name = NormalizedModelName(json_model["model"])
                     if get_model(model_name) == Organization:
-                        org_slugs.append(json_model["fields"]["slug"])
-                        # TODO(getsentry/team-ospo#190): Validate slug using regex, so that we can
-                        # fail early on obviously invalid slugs. Also keeps the database `JSONField`
-                        # from ballooning on bad input.
+                        found_org_slugs[json_model["pk"]] = json_model["fields"]["slug"]
+                    if get_model(model_name) == OrganizationMember:
+                        if json_model["fields"]["user_id"] is not None:
+                            found_user_org_memberships[json_model["fields"]["user_id"]].append(
+                                json_model["fields"]["organization"]
+                            )
                     if get_model(model_name) == User:
-                        usernames.append(json_model["fields"]["username"])
+                        found_usernames[json_model["pk"]] = json_model["fields"]["username"]
                         # TODO(getsentry/team-ospo#190): Validate username using regex, so that we
                         # can fail early on obviously invalid usernames. Also keeps the database
                         # `JSONField` from ballooning on bad input.
@@ -279,43 +289,58 @@ def preprocessing_scan(uuid: str) -> None:
                     relocation, OrderedTask.PREPROCESSING_SCAN, ERR_PREPROCESSING_INVALID_JSON
                 )
 
+            # Discard `found_org_slugs` that were not explicitly requested by the user.
+            want_org_slugs = set(relocation.want_org_slugs)
+            found_org_slugs = {k: v for k, v in found_org_slugs.items() if v in want_org_slugs}
+            found_org_ids = set(found_org_slugs.keys())
+            for slug in want_org_slugs:
+                try:
+                    validate_sentry_slug(slug)
+                except ValidationError:
+                    return fail_relocation(
+                        relocation,
+                        OrderedTask.PREPROCESSING_SCAN,
+                        ERR_PREPROCESSING_INVALID_ORG_SLUG.substitute(slug=slug),
+                    )
+
+            # Discard users that are not members of at least one of the `found_org_slugs`.
+            want_usernames = {
+                v
+                for k, v in found_usernames.items()
+                if found_org_ids & set(found_user_org_memberships[k])
+            }
+
             # Ensure that the data is reasonable and within our set bounds before we start on the
             # next task.
-            if len(usernames) == 0:
+            missing_org_slugs = want_org_slugs - set(found_org_slugs.values())
+            if len(found_usernames) == 0:
                 return fail_relocation(
                     relocation,
                     OrderedTask.PREPROCESSING_SCAN,
                     ERR_PREPROCESSING_NO_USERS,
                 )
-            if len(usernames) > MAX_USERS_PER_RELOCATION:
-                return fail_relocation(
-                    relocation,
-                    OrderedTask.PREPROCESSING_SCAN,
-                    ERR_PREPROCESSING_TOO_MANY_USERS.substitute(count=len(usernames)),
-                )
-            if len(org_slugs) == 0:
+            if len(missing_org_slugs):
                 return fail_relocation(
                     relocation,
                     OrderedTask.PREPROCESSING_SCAN,
-                    ERR_PREPROCESSING_NO_ORGS,
+                    ERR_PREPROCESSING_MISSING_ORGS.substitute(
+                        orgs=",".join(sorted(missing_org_slugs))
+                    ),
                 )
-            if len(org_slugs) > MAX_ORGS_PER_RELOCATION:
+            if len(found_org_slugs) > MAX_ORGS_PER_RELOCATION:
                 return fail_relocation(
                     relocation,
                     OrderedTask.PREPROCESSING_SCAN,
-                    ERR_PREPROCESSING_TOO_MANY_ORGS.substitute(count=len(org_slugs)),
+                    ERR_PREPROCESSING_TOO_MANY_ORGS.substitute(count=len(found_org_slugs)),
                 )
-            missing_org_slugs = set(relocation.want_org_slugs) - set(org_slugs)
-            if len(missing_org_slugs):
+            if len(want_usernames) > MAX_USERS_PER_RELOCATION:
                 return fail_relocation(
                     relocation,
                     OrderedTask.PREPROCESSING_SCAN,
-                    ERR_PREPROCESSING_MISSING_ORGS.substitute(
-                        orgs=",".join(sorted(missing_org_slugs))
-                    ),
+                    ERR_PREPROCESSING_TOO_MANY_USERS.substitute(count=len(want_usernames)),
                 )
 
-            relocation.want_usernames = sorted(usernames)
+            relocation.want_usernames = sorted(want_usernames)
             relocation.save()
 
             # The user's import data looks basically okay - we can use this opportunity to send a

+ 36 - 6
tests/sentry/tasks/test_relocation.py

@@ -46,9 +46,9 @@ from sentry.tasks.relocation import (
     ERR_PREPROCESSING_DECRYPTION,
     ERR_PREPROCESSING_INTERNAL,
     ERR_PREPROCESSING_INVALID_JSON,
+    ERR_PREPROCESSING_INVALID_ORG_SLUG,
     ERR_PREPROCESSING_INVALID_TARBALL,
     ERR_PREPROCESSING_MISSING_ORGS,
-    ERR_PREPROCESSING_NO_ORGS,
     ERR_PREPROCESSING_NO_USERS,
     ERR_PREPROCESSING_TOO_MANY_ORGS,
     ERR_PREPROCESSING_TOO_MANY_USERS,
@@ -310,7 +310,6 @@ class PreprocessingScanTest(RelocationTaskTestCase):
         assert relocation.want_usernames == [
             "admin@example.com",
             "member@example.com",
-            "superadmin@example.com",
         ]
         assert relocation.latest_notified == Relocation.EmailKind.STARTED.value
 
@@ -339,7 +338,6 @@ class PreprocessingScanTest(RelocationTaskTestCase):
         assert relocation.want_usernames == [
             "admin@example.com",
             "member@example.com",
-            "superadmin@example.com",
         ]
         assert relocation.latest_notified == Relocation.EmailKind.STARTED.value
 
@@ -536,7 +534,7 @@ class PreprocessingScanTest(RelocationTaskTestCase):
         assert relocation.latest_notified == Relocation.EmailKind.FAILED.value
         assert relocation.failure_reason == ERR_PREPROCESSING_NO_USERS
 
-    @patch("sentry.tasks.relocation.MAX_USERS_PER_RELOCATION", 0)
+    @patch("sentry.tasks.relocation.MAX_USERS_PER_RELOCATION", 1)
     def test_fail_too_many_users(
         self,
         preprocessing_transfer_mock: Mock,
@@ -559,7 +557,7 @@ class PreprocessingScanTest(RelocationTaskTestCase):
         relocation = Relocation.objects.get(uuid=self.uuid)
         assert relocation.status == Relocation.Status.FAILURE.value
         assert relocation.latest_notified == Relocation.EmailKind.FAILED.value
-        assert relocation.failure_reason == ERR_PREPROCESSING_TOO_MANY_USERS.substitute(count=3)
+        assert relocation.failure_reason == ERR_PREPROCESSING_TOO_MANY_USERS.substitute(count=2)
 
     def test_fail_no_orgs(
         self,
@@ -585,7 +583,9 @@ class PreprocessingScanTest(RelocationTaskTestCase):
         relocation = Relocation.objects.get(uuid=self.uuid)
         assert relocation.status == Relocation.Status.FAILURE.value
         assert relocation.latest_notified == Relocation.EmailKind.FAILED.value
-        assert relocation.failure_reason == ERR_PREPROCESSING_NO_ORGS
+        assert relocation.failure_reason == ERR_PREPROCESSING_MISSING_ORGS.substitute(
+            orgs="testing"
+        )
 
     @patch("sentry.tasks.relocation.MAX_ORGS_PER_RELOCATION", 0)
     def test_fail_too_many_orgs(
@@ -642,6 +642,36 @@ class PreprocessingScanTest(RelocationTaskTestCase):
             orgs=",".join(orgs)
         )
 
+    def test_fail_invalid_org_slug(
+        self,
+        preprocessing_transfer_mock: Mock,
+        fake_message_builder: Mock,
+        fake_kms_client: FakeKeyManagementServiceClient,
+    ):
+        orgs = ["$$##"]
+        relocation = Relocation.objects.get(uuid=self.uuid)
+        relocation.want_org_slugs = orgs
+        relocation.save()
+        self.mock_message_builder(fake_message_builder)
+        self.mock_kms_client(fake_kms_client)
+
+        preprocessing_scan(self.uuid)
+
+        assert fake_message_builder.call_count == 1
+        assert fake_message_builder.call_args.kwargs["type"] == "relocation.failed"
+        fake_message_builder.return_value.send_async.assert_called_once_with(
+            to=[self.owner.email, self.superuser.email]
+        )
+
+        assert preprocessing_transfer_mock.call_count == 0
+
+        relocation = Relocation.objects.get(uuid=self.uuid)
+        assert relocation.status == Relocation.Status.FAILURE.value
+        assert relocation.latest_notified == Relocation.EmailKind.FAILED.value
+        assert relocation.failure_reason == ERR_PREPROCESSING_INVALID_ORG_SLUG.substitute(
+            slug="$$##"
+        )
+
 
 @patch("sentry.utils.relocation.MessageBuilder")
 @patch("sentry.tasks.relocation.preprocessing_baseline_config.apply_async")