Browse Source

feat(backup): Enable sanitization for User and Config scope models (#69162)

All models with the `Config` or `User` `RelocationScope` should now be
properly sanitized, with the exception of `*Option` models, which will
need more complex logic, to be added later.
Alex Zaslavsky 10 months ago
parent
commit
8d933cbcf9

+ 187 - 4
src/sentry/backup/sanitize.py

@@ -1,8 +1,12 @@
+import ipaddress
 from collections.abc import Callable
 from collections.abc import Callable
 from copy import deepcopy
 from copy import deepcopy
 from dataclasses import dataclass
 from dataclasses import dataclass
 from datetime import UTC, datetime, timedelta, timezone
 from datetime import UTC, datetime, timedelta, timezone
+from ipaddress import IPv4Address, IPv6Address, ip_address
 from random import choice, randint
 from random import choice, randint
+from urllib.parse import urlparse, urlunparse
+from uuid import UUID, uuid4
 
 
 import petname
 import petname
 from dateutil.parser import parse as parse_datetime
 from dateutil.parser import parse as parse_datetime
@@ -35,6 +39,17 @@ UPPER_CASE_NON_HEX = {
 LOWER_CASE_HEX = {c.lower() for c in UPPER_CASE_HEX}
 LOWER_CASE_HEX = {c.lower() for c in UPPER_CASE_HEX}
 LOWER_CASE_NON_HEX = {c.lower() for c in UPPER_CASE_NON_HEX}
 LOWER_CASE_NON_HEX = {c.lower() for c in UPPER_CASE_NON_HEX}
 
 
+MAX_IPV4 = (2**ipaddress.IPV4LENGTH) - 1
+MAX_IPV6 = (2**ipaddress.IPV6LENGTH) - 1
+
+
+def random_ipv4():
+    return str(ipaddress.IPv4Address(randint(0, MAX_IPV4)))
+
+
+def random_ipv6():
+    return str(ipaddress.IPv6Address(randint(0, MAX_IPV6)))
+
 
 
 class SanitizationError(Exception):
 class SanitizationError(Exception):
     """
     """
@@ -64,7 +79,8 @@ class UnrecognizedDatetimeError(SanitizationError):
 @dataclass
 @dataclass
 class SanitizableField:
 class SanitizableField:
     """
     """
-    A pairing a of a `NormalizedModelName` with a field in that model, specifying the target for a sanitization operation.
+    A pairing a of a `NormalizedModelName` with a field in that model, specifying the target for a
+    sanitization operation.
     """
     """
 
 
     from sentry.backup.dependencies import NormalizedModelName
     from sentry.backup.dependencies import NormalizedModelName
@@ -100,7 +116,8 @@ def _set_field_value(json: JSONData, field: SanitizableField, value: JSONData) -
 
 
 def default_string_sanitizer(old: str) -> str:
 def default_string_sanitizer(old: str) -> str:
     """
     """
-    Default string randomizer. Looks at the characters present in the source string to create a new, random string from a roughly similar set of characters.
+    Default string randomizer. Looks at the characters present in the source string to create a new,
+    random string from a roughly similar set of characters.
     """
     """
 
 
     has_upper_case_hex = False
     has_upper_case_hex = False
@@ -136,7 +153,7 @@ def default_string_sanitizer(old: str) -> str:
     if has_digit:
     if has_digit:
         chars += "0123456789"
         chars += "0123456789"
 
 
-    return "".join([choice(list(chars)) for _ in range(3)])
+    return "".join([choice(list(chars)) for _ in range(len(old))])
 
 
 
 
 class Sanitizer:
 class Sanitizer:
@@ -174,7 +191,7 @@ class Sanitizer:
 
 
     def __init__(self, export: JSONData, datetime_offset: timedelta | None = None):
     def __init__(self, export: JSONData, datetime_offset: timedelta | None = None):
         self.json = export
         self.json = export
-        self.interned_strings = dict()
+        self.interned_strings = {"": ""}  # Always map empty string to itself.
         self.interned_datetimes = dict()
         self.interned_datetimes = dict()
 
 
         # Walk the data once, extracting any dates into a set.
         # Walk the data once, extracting any dates into a set.
@@ -234,6 +251,34 @@ class Sanitizer:
 
 
         return "@".join([self.map_string(p) for p in old.split("@")])
         return "@".join([self.map_string(p) for p in old.split("@")])
 
 
+    def map_ip(self, old: str) -> str:
+        """
+        Maps an IP with some randomly generated alternative. If the `old` IP has already been seen,
+        the already-generated value for that existing key will be used instead. If it has not, we'll
+        generate a new one. This ensures that all identical existing IPs are swapped with identical
+        replacements everywhere they occur.
+
+        If the `old` string is not a valid IP, it will be treated as a regular string.
+
+        If you wish to update an actual JSON model in-place with this newly generated IP, `set_ip()`
+        is the preferred method for doing so.
+        """
+
+        interned = self.interned_strings.get(old)
+        if interned is not None:
+            return interned
+
+        try:
+            old_ip = ip_address(old)
+        except ValueError:
+            return self.map_string(old)
+        else:
+            if isinstance(old_ip, IPv4Address):
+                self.interned_strings[old] = random_ipv4()
+            elif isinstance(old_ip, IPv6Address):
+                self.interned_strings[old] = random_ipv6()
+            return self.interned_strings[old]
+
     def map_name(self, old: str) -> str:
     def map_name(self, old: str) -> str:
         """
         """
         Maps a proper noun name with some randomly generated "petname" value (ex: "Hairy Tortoise").
         Maps a proper noun name with some randomly generated "petname" value (ex: "Hairy Tortoise").
@@ -285,6 +330,54 @@ class Sanitizer:
         self.interned_strings[old] = new
         self.interned_strings[old] = new
         return new
         return new
 
 
+    def map_url(self, old: str) -> str:
+        """
+        Map an URL in a manner that retains domain relationships - ie, all sanitized URLs from
+        domain `.foo` will now be from `.bar`. Scheme, subdomains, paths, etc will be retained in
+        kind (ie: if we have a path, we will sanitize it with a similar, interned one). This ensures
+        that all identical existing URLs are swapped with identical replacements everywhere they
+        occur.
+
+        If the `old` string is not a valid URL , it will be treated as a regular string.
+
+        If you wish to update an actual JSON model in-place with this newly generated URL,
+        `set_URL()` is the preferred method for doing so.
+        """
+
+        url = urlparse(old)
+        hostname = url.hostname
+        if not hostname:
+            return self.map_string(old)
+
+        new_path = (
+            "" if not url.path else ".".join([self.map_string(p) for p in url.path.split(".")])
+        )
+        new_query = (
+            "" if not url.query else ".".join([self.map_string(q) for q in url.query.split(".")])
+        )
+        return urlunparse(
+            url._replace(
+                scheme=self.map_string(url.scheme) if url.scheme else "",
+                netloc=".".join([self.map_string(d) for d in hostname.split(".")]),
+                path=new_path,
+                query=new_query,
+                fragment=self.map_string(url.fragment) if url.fragment else "",
+            )
+        )
+
+    def map_uuid(self, old: str) -> str:
+        """
+        Maps a UUID. If the `old` UUID has already been seen, the already-generated value for that
+        existing key will be used instead. If it has not, we'll generate a new one. This ensures
+        that all identical existing UUIDs are swapped with identical replacements everywhere they
+        occur.
+
+        If you wish to update an actual JSON model in-place with this newly generated name,
+        `set_uuid()` is the preferred method for doing so.
+        """
+
+        return self.map_string(old, lambda _: str(uuid4()))
+
     def set_datetime(self, json: JSONData, field: SanitizableField) -> datetime | None:
     def set_datetime(self, json: JSONData, field: SanitizableField) -> datetime | None:
         """
         """
         Replaces a datetime by replacing it with a different, but still correctly ordered,
         Replaces a datetime by replacing it with a different, but still correctly ordered,
@@ -334,6 +427,34 @@ class Sanitizer:
 
 
         return _set_field_value(json, field, self.map_email(old))
         return _set_field_value(json, field, self.map_email(old))
 
 
+    def set_ip(
+        self,
+        json: JSONData,
+        field: SanitizableField,
+    ) -> str | None:
+        """
+        Replaces a IP with a randomly generated value. If the existing value of the IP has
+        already been seen, the already-generated value for that existing key will be used instead.
+        If it has not, we'll generate a new one. This ensures that all identical existing IPs are
+        swapped with identical replacements everywhere they occur.
+
+        This method updates the JSON in-place if the specified field is a non-null value, then
+        returns the newly generated replacement. If the specified field could not be found in the
+        supplied JSON model, `None` is returned instead.
+
+        If you wish to merely generate a string without updating the JSON in-place, consider using
+        `map_ip()` instead.
+        """
+
+        field.validate_json_model(json)
+        old = _get_field_value(json, field)
+        if old is None:
+            return None
+        if not isinstance(old, str):
+            raise TypeError("Existing value must be a string")
+
+        return _set_field_value(json, field, self.map_ip(old))
+
     def set_name(
     def set_name(
         self,
         self,
         json: JSONData,
         json: JSONData,
@@ -432,6 +553,68 @@ class Sanitizer:
 
 
         return _set_field_value(json, field, self.map_string(old, generate))
         return _set_field_value(json, field, self.map_string(old, generate))
 
 
+    def set_url(
+        self,
+        json: JSONData,
+        field: SanitizableField,
+    ) -> str | None:
+        """
+        Replace a URL in a manner that retains domain relationships - ie, all sanitized URLs from
+        domain `.foo` will now be from `.bar`. Scheme, subdomains, paths, etc will be retained in
+        kind (ie: if we have a path, we will sanitize it with a similar, interned one). This ensures
+        that all identical existing URLs are swapped with identical replacements everywhere they
+        occur.
+
+        If the `old` string is not a valid URL , it will be treated as a regular string.
+
+        This method updates the JSON in-place if the specified field is a non-null value, then
+        returns the newly generated replacement. If the specified field could not be found in the
+        supplied JSON model, `None` is returned instead.
+
+        If you wish to merely generate a string without updating the JSON in-place, consider using
+        `map_url()` instead.
+        """
+
+        field.validate_json_model(json)
+        old = _get_field_value(json, field)
+        if old is None:
+            return None
+        if not isinstance(old, str):
+            raise TypeError("Existing value must be a string")
+
+        return _set_field_value(json, field, self.map_url(old))
+
+    def set_uuid(
+        self,
+        json: JSONData,
+        field: SanitizableField,
+    ) -> str | None:
+        """
+        Replaces a UUID with a randomly generated value. If the existing value of the UUID has
+        already been seen, the already-generated value for that existing key will be used instead.
+        If it has not, we'll generate a new one. This ensures that all identical existing UUIDs are
+        swapped with identical replacements everywhere they occur.
+
+        This method updates the JSON in-place if the specified field is a non-null value, then
+        returns the newly generated replacement. If the specified field could not be found in the
+        supplied JSON model, `None` is returned instead.
+
+        If you wish to merely generate a string without updating the JSON in-place, consider using
+        `map_uuid()` instead.
+        """
+
+        field.validate_json_model(json)
+        old = _get_field_value(json, field)
+        if old is None:
+            return None
+        if not isinstance(old, str):
+            raise TypeError("Existing value must be a string")
+
+        # Will throw an error if this is not a valid UUID.
+        UUID(old)
+
+        return _set_field_value(json, field, self.map_uuid(old))
+
 
 
 def sanitize(export: JSONData, datetime_offset: timedelta | None = None) -> JSONData:
 def sanitize(export: JSONData, datetime_offset: timedelta | None = None) -> JSONData:
     """
     """

+ 27 - 0
src/sentry/db/models/base.py

@@ -18,6 +18,7 @@ from sentry.backup.dependencies import (
 from sentry.backup.helpers import ImportFlags
 from sentry.backup.helpers import ImportFlags
 from sentry.backup.sanitize import SanitizableField, Sanitizer
 from sentry.backup.sanitize import SanitizableField, Sanitizer
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.backup.scopes import ImportScope, RelocationScope
+from sentry.db.models.fields.uuid import UUIDField
 from sentry.silo.base import SiloLimit, SiloMode
 from sentry.silo.base import SiloLimit, SiloMode
 from sentry.utils.json import JSONData
 from sentry.utils.json import JSONData
 
 
@@ -211,6 +212,9 @@ class BaseModel(models.Model):
         fields = cls._meta.get_fields()
         fields = cls._meta.get_fields()
         field_names = [f.name for f in fields]
         field_names = [f.name for f in fields]
 
 
+        str_field_types = [models.CharField, models.TextField]
+        sensitive_words = ["password", "token", "secret"]
+
         # All `models.CharField` fields called "slug" and "name" can be auto-sanitized as strings.
         # All `models.CharField` fields called "slug" and "name" can be auto-sanitized as strings.
         if "name" in field_names and "slug" in field_names:
         if "name" in field_names and "slug" in field_names:
             sanitizer.set_name_and_slug_pair(
             sanitizer.set_name_and_slug_pair(
@@ -228,6 +232,29 @@ class BaseModel(models.Model):
             if isinstance(f, models.EmailField):
             if isinstance(f, models.EmailField):
                 sanitizer.set_email(json, SanitizableField(model_name, f.name))
                 sanitizer.set_email(json, SanitizableField(model_name, f.name))
 
 
+            # Auto-sanitize all IP Address fields.
+            if isinstance(f, models.IPAddressField) or isinstance(f, models.GenericIPAddressField):
+                sanitizer.set_ip(json, SanitizableField(model_name, f.name))
+
+            # Auto-sanitize all URL fields.
+            if isinstance(f, models.URLField) or f.name.endswith("url") or f.name.endswith("uri"):
+                sanitizer.set_url(json, SanitizableField(model_name, f.name))
+
+            # Auto-sanitize all UUID fields.
+            if (
+                isinstance(f, models.UUIDField)
+                or isinstance(f, UUIDField)
+                or f.name.endswith("guid")
+                or f.name.endswith("uuid")
+            ):
+                sanitizer.set_uuid(json, SanitizableField(model_name, f.name))
+
+            # Auto-sanitize all string fields that contain any sensitive words in their name.
+            is_str_field_type = next(filter(lambda t: isinstance(f, t), str_field_types), None)
+            contains_sensitive_word = next(filter(lambda w: w in f.name, sensitive_words), None)
+            if is_str_field_type and contains_sensitive_word:
+                sanitizer.set_string(json, SanitizableField(model_name, f.name))
+
         return None
         return None
 
 
     def normalize_before_relocation_import(
     def normalize_before_relocation_import(

+ 34 - 1
src/sentry/models/apitoken.py

@@ -11,8 +11,9 @@ from django.utils import timezone
 from django.utils.encoding import force_str
 from django.utils.encoding import force_str
 
 
 from sentry import options
 from sentry import options
-from sentry.backup.dependencies import ImportKind
+from sentry.backup.dependencies import ImportKind, NormalizedModelName, get_model_name
 from sentry.backup.helpers import ImportFlags
 from sentry.backup.helpers import ImportFlags
+from sentry.backup.sanitize import SanitizableField, Sanitizer
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.constants import SentryAppStatus
 from sentry.constants import SentryAppStatus
 from sentry.db.models import FlexibleForeignKey, control_silo_model, sane_repr
 from sentry.db.models import FlexibleForeignKey, control_silo_model, sane_repr
@@ -22,6 +23,7 @@ from sentry.models.apiscopes import HasApiScopes
 from sentry.models.outbox import OutboxCategory
 from sentry.models.outbox import OutboxCategory
 from sentry.types.region import find_all_region_names
 from sentry.types.region import find_all_region_names
 from sentry.types.token import AuthTokenType
 from sentry.types.token import AuthTokenType
+from sentry.utils.json import JSONData
 
 
 DEFAULT_EXPIRATION = timedelta(days=30)
 DEFAULT_EXPIRATION = timedelta(days=30)
 TOKEN_REDACTED = "***REDACTED***"
 TOKEN_REDACTED = "***REDACTED***"
@@ -318,6 +320,37 @@ class ApiToken(ReplicatedControlModel, HasApiScopes):
 
 
         return super().write_relocation_import(scope, flags)
         return super().write_relocation_import(scope, flags)
 
 
+    @classmethod
+    def sanitize_relocation_json(
+        cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
+    ) -> None:
+        model_name = get_model_name(cls) if model_name is None else model_name
+        super().sanitize_relocation_json(json, sanitizer, model_name)
+
+        token = generate_token()
+        token_last_characters = token[-4:]
+        hashed_token = hashlib.sha256(token.encode()).hexdigest()
+        refresh_token = generate_token()
+        hashed_refresh_token = hashlib.sha256(refresh_token.encode()).hexdigest()
+
+        sanitizer.set_string(json, SanitizableField(model_name, "token"), lambda _: token)
+        sanitizer.set_string(
+            json,
+            SanitizableField(model_name, "token_last_characters"),
+            lambda _: token_last_characters,
+        )
+        sanitizer.set_string(
+            json, SanitizableField(model_name, "hashed_token"), lambda _: hashed_token
+        )
+        sanitizer.set_string(
+            json, SanitizableField(model_name, "refresh_token"), lambda _: refresh_token
+        )
+        sanitizer.set_string(
+            json,
+            SanitizableField(model_name, "hashed_refresh_token"),
+            lambda _: hashed_refresh_token,
+        )
+
     @property
     @property
     def organization_id(self) -> int | None:
     def organization_id(self) -> int | None:
         from sentry.models.integrations.sentry_app_installation import SentryAppInstallation
         from sentry.models.integrations.sentry_app_installation import SentryAppInstallation

+ 23 - 0
src/sentry/models/relay.py

@@ -3,9 +3,12 @@ from django.utils import timezone
 from django.utils.functional import cached_property
 from django.utils.functional import cached_property
 from sentry_relay.auth import PublicKey
 from sentry_relay.auth import PublicKey
 
 
+from sentry.backup.dependencies import NormalizedModelName, get_model_name
 from sentry.backup.mixins import OverwritableConfigMixin
 from sentry.backup.mixins import OverwritableConfigMixin
+from sentry.backup.sanitize import SanitizableField, Sanitizer
 from sentry.backup.scopes import RelocationScope
 from sentry.backup.scopes import RelocationScope
 from sentry.db.models import Model, region_silo_model
 from sentry.db.models import Model, region_silo_model
+from sentry.utils.json import JSONData
 
 
 
 
 @region_silo_model
 @region_silo_model
@@ -24,6 +27,16 @@ class RelayUsage(OverwritableConfigMixin, Model):
         app_label = "sentry"
         app_label = "sentry"
         db_table = "sentry_relayusage"
         db_table = "sentry_relayusage"
 
 
+    @classmethod
+    def sanitize_relocation_json(
+        cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
+    ) -> None:
+        model_name = get_model_name(cls) if model_name is None else model_name
+        super().sanitize_relocation_json(json, sanitizer, model_name)
+
+        sanitizer.set_uuid(json, SanitizableField(model_name, "relay_id"))
+        sanitizer.set_string(json, SanitizableField(model_name, "public_key"))
+
 
 
 @region_silo_model
 @region_silo_model
 class Relay(OverwritableConfigMixin, Model):
 class Relay(OverwritableConfigMixin, Model):
@@ -66,3 +79,13 @@ class Relay(OverwritableConfigMixin, Model):
         Returns all the relays that are configured with one of the specified keys
         Returns all the relays that are configured with one of the specified keys
         """
         """
         return Relay.objects.filter(public_key__in=keys)
         return Relay.objects.filter(public_key__in=keys)
+
+    @classmethod
+    def sanitize_relocation_json(
+        cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
+    ) -> None:
+        model_name = get_model_name(cls) if model_name is None else model_name
+        super().sanitize_relocation_json(json, sanitizer, model_name)
+
+        sanitizer.set_uuid(json, SanitizableField(model_name, "relay_id"))
+        sanitizer.set_string(json, SanitizableField(model_name, "public_key"))

+ 18 - 1
src/sentry/models/user.py

@@ -21,8 +21,14 @@ from django.utils.translation import gettext_lazy as _
 
 
 from bitfield import TypedClassBitField
 from bitfield import TypedClassBitField
 from sentry.auth.authenticators import available_authenticators
 from sentry.auth.authenticators import available_authenticators
-from sentry.backup.dependencies import ImportKind, PrimaryKeyMap
+from sentry.backup.dependencies import (
+    ImportKind,
+    NormalizedModelName,
+    PrimaryKeyMap,
+    get_model_name,
+)
 from sentry.backup.helpers import ImportFlags
 from sentry.backup.helpers import ImportFlags
+from sentry.backup.sanitize import SanitizableField, Sanitizer
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.db.models import (
 from sentry.db.models import (
     BaseManager,
     BaseManager,
@@ -44,6 +50,7 @@ from sentry.services.hybrid_cloud.user import RpcUser
 from sentry.types.integrations import EXTERNAL_PROVIDERS, ExternalProviders
 from sentry.types.integrations import EXTERNAL_PROVIDERS, ExternalProviders
 from sentry.types.region import find_all_region_names, find_regions_for_user
 from sentry.types.region import find_all_region_names, find_regions_for_user
 from sentry.utils.http import absolute_uri
 from sentry.utils.http import absolute_uri
+from sentry.utils.json import JSONData
 from sentry.utils.retries import TimedRetryPolicy
 from sentry.utils.retries import TimedRetryPolicy
 
 
 audit_logger = logging.getLogger("sentry.audit.user")
 audit_logger = logging.getLogger("sentry.audit.user")
@@ -504,6 +511,16 @@ class User(BaseModel, AbstractBaseUser):
             # Perform the remainder of the write while we're still holding the lock.
             # Perform the remainder of the write while we're still holding the lock.
             return do_write()
             return do_write()
 
 
+    @classmethod
+    def sanitize_relocation_json(
+        cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
+    ) -> None:
+        model_name = get_model_name(cls) if model_name is None else model_name
+        super().sanitize_relocation_json(json, sanitizer, model_name)
+
+        sanitizer.set_string(json, SanitizableField(model_name, "username"))
+        sanitizer.set_string(json, SanitizableField(model_name, "session_nonce"))
+
     @classmethod
     @classmethod
     def handle_async_deletion(
     def handle_async_deletion(
         cls,
         cls,

+ 20 - 1
src/sentry/models/useremail.py

@@ -10,8 +10,14 @@ from django.db import models
 from django.utils import timezone
 from django.utils import timezone
 from django.utils.translation import gettext_lazy as _
 from django.utils.translation import gettext_lazy as _
 
 
-from sentry.backup.dependencies import ImportKind, PrimaryKeyMap, get_model_name
+from sentry.backup.dependencies import (
+    ImportKind,
+    NormalizedModelName,
+    PrimaryKeyMap,
+    get_model_name,
+)
 from sentry.backup.helpers import ImportFlags
 from sentry.backup.helpers import ImportFlags
+from sentry.backup.sanitize import SanitizableField, Sanitizer
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.backup.scopes import ImportScope, RelocationScope
 from sentry.db.models import BaseManager, FlexibleForeignKey, control_silo_model, sane_repr
 from sentry.db.models import BaseManager, FlexibleForeignKey, control_silo_model, sane_repr
 from sentry.db.models.outboxes import ControlOutboxProducingModel
 from sentry.db.models.outboxes import ControlOutboxProducingModel
@@ -19,6 +25,7 @@ from sentry.models.outbox import ControlOutboxBase, OutboxCategory
 from sentry.services.hybrid_cloud.organization.model import RpcOrganization
 from sentry.services.hybrid_cloud.organization.model import RpcOrganization
 from sentry.services.hybrid_cloud.user.model import RpcUser
 from sentry.services.hybrid_cloud.user.model import RpcUser
 from sentry.types.region import find_regions_for_user
 from sentry.types.region import find_regions_for_user
+from sentry.utils.json import JSONData
 from sentry.utils.security import get_secure_token
 from sentry.utils.security import get_secure_token
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
@@ -142,3 +149,15 @@ class UserEmail(ControlOutboxProducingModel):
         # `--merge_users=true` case is handled in the `normalize_before_relocation_import()` method
         # `--merge_users=true` case is handled in the `normalize_before_relocation_import()` method
         # above).
         # above).
         return (useremail.pk, ImportKind.Inserted)
         return (useremail.pk, ImportKind.Inserted)
+
+    @classmethod
+    def sanitize_relocation_json(
+        cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
+    ) -> None:
+        model_name = get_model_name(cls) if model_name is None else model_name
+        super().sanitize_relocation_json(json, sanitizer, model_name)
+
+        validation_hash = get_secure_token()
+        sanitizer.set_string(
+            json, SanitizableField(model_name, "validation_hash"), lambda _: validation_hash
+        )

+ 60 - 13
tests/sentry/backup/snapshots/SanitizationExhaustiveTests/test_clean_pks.pysnap

@@ -1,5 +1,5 @@
 ---
 ---
-created: '2024-04-25T18:22:28.906727+00:00'
+created: '2024-05-02T21:07:58.578052+00:00'
 creator: sentry
 creator: sentry
 source: tests/sentry/backup/test_sanitize.py
 source: tests/sentry/backup/test_sanitize.py
 ---
 ---
@@ -68,18 +68,22 @@ source: tests/sentry/backup/test_sanitize.py
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - secret_key
 - model_name: sentry.projectkey
 - model_name: sentry.projectkey
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - secret_key
 - model_name: sentry.projectkey
 - model_name: sentry.projectkey
   ordinal: 3
   ordinal: 3
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - secret_key
 - model_name: sentry.projectkey
 - model_name: sentry.projectkey
   ordinal: 4
   ordinal: 4
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - secret_key
 - model_name: sentry.projectoption
 - model_name: sentry.projectoption
   ordinal: 1
   ordinal: 1
   sanitized_fields: []
   sanitized_fields: []
@@ -127,17 +131,22 @@ source: tests/sentry/backup/test_sanitize.py
   - date_added
   - date_added
 - model_name: sentry.relay
 - model_name: sentry.relay
   ordinal: 1
   ordinal: 1
-  sanitized_fields: []
+  sanitized_fields:
+  - public_key
+  - relay_id
 - model_name: sentry.relayusage
 - model_name: sentry.relayusage
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - first_seen
   - first_seen
   - last_seen
   - last_seen
+  - public_key
+  - relay_id
 - model_name: sentry.repository
 - model_name: sentry.repository
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
   - name
   - name
+  - url
 - model_name: sentry.team
 - model_name: sentry.team
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -151,7 +160,8 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.user
 - model_name: sentry.user
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
@@ -159,7 +169,8 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.user
 - model_name: sentry.user
   ordinal: 3
   ordinal: 3
   sanitized_fields:
   sanitized_fields:
@@ -167,7 +178,8 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.user
 - model_name: sentry.user
   ordinal: 4
   ordinal: 4
   sanitized_fields:
   sanitized_fields:
@@ -175,14 +187,14 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.user
 - model_name: sentry.user
   ordinal: 5
   ordinal: 5
   sanitized_fields:
   sanitized_fields:
   - date_joined
   - date_joined
-  - email
   - last_active
   - last_active
-  - name
+  - username
 - model_name: sentry.user
 - model_name: sentry.user
   ordinal: 6
   ordinal: 6
   sanitized_fields:
   sanitized_fields:
@@ -190,16 +202,19 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.userip
 - model_name: sentry.userip
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - first_seen
   - first_seen
+  - ip_address
   - last_seen
   - last_seen
 - model_name: sentry.userip
 - model_name: sentry.userip
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
   - first_seen
   - first_seen
+  - ip_address
   - last_seen
   - last_seen
 - model_name: sentry.useroption
 - model_name: sentry.useroption
   ordinal: 1
   ordinal: 1
@@ -246,6 +261,8 @@ source: tests/sentry/backup/test_sanitize.py
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
   - name
   - name
+  - token_hashed
+  - token_last_characters
 - model_name: sentry.organizationmember
 - model_name: sentry.organizationmember
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -261,7 +278,7 @@ source: tests/sentry/backup/test_sanitize.py
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
-  - name
+  - guid
   - slug
   - slug
 - model_name: sentry.environment
 - model_name: sentry.environment
   ordinal: 1
   ordinal: 1
@@ -292,7 +309,6 @@ source: tests/sentry/backup/test_sanitize.py
   ordinal: 5
   ordinal: 5
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
-  - email
 - model_name: sentry.email
 - model_name: sentry.email
   ordinal: 6
   ordinal: 6
   sanitized_fields:
   sanitized_fields:
@@ -341,6 +357,7 @@ source: tests/sentry/backup/test_sanitize.py
 - model_name: sentry.apiapplication
 - model_name: sentry.apiapplication
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
+  - client_secret
   - date_added
   - date_added
   - name
   - name
 - model_name: sentry.useremail
 - model_name: sentry.useremail
@@ -348,31 +365,36 @@ source: tests/sentry/backup/test_sanitize.py
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.useremail
 - model_name: sentry.useremail
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.useremail
 - model_name: sentry.useremail
   ordinal: 3
   ordinal: 3
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.useremail
 - model_name: sentry.useremail
   ordinal: 4
   ordinal: 4
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.useremail
 - model_name: sentry.useremail
   ordinal: 5
   ordinal: 5
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
-  - email
+  - validation_hash
 - model_name: sentry.useremail
 - model_name: sentry.useremail
   ordinal: 6
   ordinal: 6
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.snubaquery
 - model_name: sentry.snubaquery
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -392,6 +414,8 @@ source: tests/sentry/backup/test_sanitize.py
   - date_updated
   - date_updated
   - name
   - name
   - slug
   - slug
+  - uuid
+  - webhook_url
 - model_name: sentry.rule
 - model_name: sentry.rule
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -451,20 +475,35 @@ source: tests/sentry/backup/test_sanitize.py
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
   - expires_at
   - expires_at
+  - hashed_refresh_token
+  - hashed_token
+  - refresh_token
+  - token
+  - token_last_characters
 - model_name: sentry.apitoken
 - model_name: sentry.apitoken
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - hashed_refresh_token
+  - hashed_token
   - name
   - name
+  - refresh_token
+  - token
+  - token_last_characters
 - model_name: sentry.apitoken
 - model_name: sentry.apitoken
   ordinal: 3
   ordinal: 3
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - hashed_token
   - name
   - name
+  - token
+  - token_last_characters
+  - token_type
 - model_name: sentry.apigrant
 - model_name: sentry.apigrant
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - expires_at
   - expires_at
+  - redirect_uri
 - model_name: sentry.apiauthorization
 - model_name: sentry.apiauthorization
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -505,9 +544,11 @@ source: tests/sentry/backup/test_sanitize.py
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
   - date_updated
   - date_updated
+  - uuid
 - model_name: sentry.sentryappcomponent
 - model_name: sentry.sentryappcomponent
   ordinal: 1
   ordinal: 1
-  sanitized_fields: []
+  sanitized_fields:
+  - uuid
 - model_name: sentry.rulesnooze
 - model_name: sentry.rulesnooze
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -572,10 +613,16 @@ source: tests/sentry/backup/test_sanitize.py
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - guid
+  - secret
+  - url
 - model_name: sentry.servicehook
 - model_name: sentry.servicehook
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - guid
+  - secret
+  - url
 - model_name: sentry.incident
 - model_name: sentry.incident
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:

+ 14 - 4
tests/sentry/backup/snapshots/SanitizationIntegrationTests/test_fresh_install.pysnap

@@ -1,5 +1,5 @@
 ---
 ---
-created: '2024-04-17T17:41:21.647153+00:00'
+created: '2024-05-02T21:07:54.051381+00:00'
 creator: sentry
 creator: sentry
 source: tests/sentry/backup/test_sanitize.py
 source: tests/sentry/backup/test_sanitize.py
 ---
 ---
@@ -45,7 +45,8 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.user
 - model_name: sentry.user
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
@@ -53,15 +54,20 @@ source: tests/sentry/backup/test_sanitize.py
   - email
   - email
   - last_active
   - last_active
   - last_password_change
   - last_password_change
-  - name
+  - password
+  - username
 - model_name: sentry.relayusage
 - model_name: sentry.relayusage
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - first_seen
   - first_seen
   - last_seen
   - last_seen
+  - public_key
+  - relay_id
 - model_name: sentry.relay
 - model_name: sentry.relay
   ordinal: 1
   ordinal: 1
-  sanitized_fields: []
+  sanitized_fields:
+  - public_key
+  - relay_id
 - model_name: sentry.authenticator
 - model_name: sentry.authenticator
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -75,11 +81,13 @@ source: tests/sentry/backup/test_sanitize.py
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.useremail
 - model_name: sentry.useremail
   ordinal: 2
   ordinal: 2
   sanitized_fields:
   sanitized_fields:
   - date_hash_added
   - date_hash_added
   - email
   - email
+  - validation_hash
 - model_name: sentry.userrole
 - model_name: sentry.userrole
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -115,6 +123,7 @@ source: tests/sentry/backup/test_sanitize.py
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
   - date_added
   - date_added
+  - secret_key
 - model_name: sentry.rule
 - model_name: sentry.rule
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
@@ -143,5 +152,6 @@ source: tests/sentry/backup/test_sanitize.py
 - model_name: sentry.apiapplication
 - model_name: sentry.apiapplication
   ordinal: 1
   ordinal: 1
   sanitized_fields:
   sanitized_fields:
+  - client_secret
   - date_added
   - date_added
   - name
   - name

+ 53 - 1
tests/sentry/backup/test_sanitize.py

@@ -24,6 +24,7 @@ from sentry.backup.sanitize import (
 from sentry.backup.scopes import RelocationScope
 from sentry.backup.scopes import RelocationScope
 from sentry.db.models.base import DefaultFieldsModel
 from sentry.db.models.base import DefaultFieldsModel
 from sentry.db.models.fields.slug import SentrySlugField
 from sentry.db.models.fields.slug import SentrySlugField
+from sentry.db.models.fields.uuid import UUIDField
 from sentry.testutils.cases import TestCase
 from sentry.testutils.cases import TestCase
 from sentry.testutils.factories import get_fixture_path
 from sentry.testutils.factories import get_fixture_path
 from sentry.testutils.helpers.backups import BackupTestCase
 from sentry.testutils.helpers.backups import BackupTestCase
@@ -37,6 +38,10 @@ FAKE_NAME = "Fake Name"
 FAKE_NICKNAME = "Fake Nickname"
 FAKE_NICKNAME = "Fake Nickname"
 FAKE_SLUG = "fake-slug"
 FAKE_SLUG = "fake-slug"
 FAKE_TEXT = "This is some text."
 FAKE_TEXT = "This is some text."
+FAKE_IP_V4 = "8.8.8.8"
+FAKE_IP_V6 = "9c72:8448:90c4:4e5e:a946:c1f5:71a6:4cc2"
+FAKE_URL = "https://sub.domain.example.com/some/path?a=b&c=d#foo"
+FAKE_UUID = "6b79316f-cd5c-42fa-ad45-20ce0b1f0725"
 
 
 CURR_DATE = datetime.now()
 CURR_DATE = datetime.now()
 CURR_YEAR = CURR_DATE.year
 CURR_YEAR = CURR_DATE.year
@@ -55,6 +60,10 @@ class FakeSanitizableModel(DefaultFieldsModel):
     slug = SentrySlugField(null=True)
     slug = SentrySlugField(null=True)
     nickname = models.CharField(null=True, max_length=32)
     nickname = models.CharField(null=True, max_length=32)
     text = SentrySlugField(null=True, max_length=128)
     text = SentrySlugField(null=True, max_length=128)
+    ip_v4 = models.GenericIPAddressField(null=True)
+    ip_v6 = models.GenericIPAddressField(null=True)
+    url = models.URLField(null=True)
+    uuid = UUIDField(null=True)
 
 
     class Meta:
     class Meta:
         app_label = "test"
         app_label = "test"
@@ -65,9 +74,10 @@ class FakeSanitizableModel(DefaultFieldsModel):
         cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
         cls, json: JSONData, sanitizer: Sanitizer, model_name: NormalizedModelName | None = None
     ) -> None:
     ) -> None:
         model_name = get_model_name(cls) if model_name is None else model_name
         model_name = get_model_name(cls) if model_name is None else model_name
+        super().sanitize_relocation_json(json, sanitizer, model_name)
+
         sanitizer.set_name(json, SanitizableField(model_name, "nickname"))
         sanitizer.set_name(json, SanitizableField(model_name, "nickname"))
         sanitizer.set_string(json, SanitizableField(model_name, "text"))
         sanitizer.set_string(json, SanitizableField(model_name, "text"))
-        return super().sanitize_relocation_json(json, sanitizer, model_name)
 
 
 
 
 @patch("sentry.backup.dependencies.get_model", Mock(return_value=FakeSanitizableModel))
 @patch("sentry.backup.dependencies.get_model", Mock(return_value=FakeSanitizableModel))
@@ -92,6 +102,10 @@ class SanitizationUnitTests(TestCase):
             slug=FAKE_SLUG,
             slug=FAKE_SLUG,
             nickname=FAKE_NICKNAME,
             nickname=FAKE_NICKNAME,
             text=FAKE_TEXT,
             text=FAKE_TEXT,
+            ip_v4=FAKE_IP_V4,
+            ip_v6=FAKE_IP_V6,
+            url=FAKE_URL,
+            uuid=FAKE_UUID,
         )
         )
         faked = self.serialize_to_json_data([model, model])
         faked = self.serialize_to_json_data([model, model])
         sanitized = sanitize(faked, DELTA_YEAR)
         sanitized = sanitize(faked, DELTA_YEAR)
@@ -108,6 +122,15 @@ class SanitizationUnitTests(TestCase):
         assert isinstance(s0["nickname"], str)
         assert isinstance(s0["nickname"], str)
         assert isinstance(s0["text"], str)
         assert isinstance(s0["text"], str)
 
 
+        assert isinstance(s0["ip_v4"], str)
+        assert s0["ip_v4"].count(".") == 3
+
+        assert isinstance(s0["ip_v6"], str)
+        assert s0["ip_v6"].count(":") == 7
+
+        assert isinstance(s0["url"], str)
+        assert isinstance(s0["uuid"], str)
+
         # Confirm sanitization.
         # Confirm sanitization.
         assert parse_datetime(f0["date_added"]) < s0["date_added"]
         assert parse_datetime(f0["date_added"]) < s0["date_added"]
         assert parse_datetime(f0["date_updated"]) < s0["date_updated"]
         assert parse_datetime(f0["date_updated"]) < s0["date_updated"]
@@ -116,6 +139,10 @@ class SanitizationUnitTests(TestCase):
         assert f0["slug"] != s0["slug"]
         assert f0["slug"] != s0["slug"]
         assert f0["nickname"] != s0["nickname"]
         assert f0["nickname"] != s0["nickname"]
         assert f0["text"] != s0["text"]
         assert f0["text"] != s0["text"]
+        assert f0["ip_v4"] != s0["ip_v4"]
+        assert f0["ip_v6"] != s0["ip_v6"]
+        assert f0["url"] != s0["url"]
+        assert f0["uuid"] != s0["uuid"]
 
 
         # Identical source values remain equal after sanitization.
         # Identical source values remain equal after sanitization.
         assert s0["date_added"] == s1["date_added"]
         assert s0["date_added"] == s1["date_added"]
@@ -125,6 +152,10 @@ class SanitizationUnitTests(TestCase):
         assert s0["slug"] == s1["slug"]
         assert s0["slug"] == s1["slug"]
         assert s0["nickname"] == s1["nickname"]
         assert s0["nickname"] == s1["nickname"]
         assert s0["text"] == s1["text"]
         assert s0["text"] == s1["text"]
+        assert s0["ip_v4"] == s1["ip_v4"]
+        assert s0["ip_v6"] == s1["ip_v6"]
+        assert s0["url"] == s1["url"]
+        assert s0["uuid"] == s1["uuid"]
 
 
     def test_good_all_sanitizers_unset_fields(self):
     def test_good_all_sanitizers_unset_fields(self):
         model = FakeSanitizableModel(
         model = FakeSanitizableModel(
@@ -134,6 +165,10 @@ class SanitizationUnitTests(TestCase):
             nickname=None,
             nickname=None,
             slug=None,
             slug=None,
             text=None,
             text=None,
+            ip_v4=None,
+            ip_v6=None,
+            url=None,
+            uuid=None,
         )
         )
         faked = self.serialize_to_json_data([model])
         faked = self.serialize_to_json_data([model])
         sanitized = sanitize(faked, DELTA_YEAR)
         sanitized = sanitize(faked, DELTA_YEAR)
@@ -146,12 +181,21 @@ class SanitizationUnitTests(TestCase):
         assert s["slug"] is None
         assert s["slug"] is None
         assert s["nickname"] is None
         assert s["nickname"] is None
         assert s["text"] is None
         assert s["text"] is None
+        assert s["ip_v4"] is None
+        assert s["ip_v6"] is None
+        assert s["url"] is None
+        assert s["uuid"] is None
+
         assert s["date_updated"] == f["date_updated"]
         assert s["date_updated"] == f["date_updated"]
         assert s["email"] == f["email"]
         assert s["email"] == f["email"]
         assert s["name"] == f["name"]
         assert s["name"] == f["name"]
         assert s["slug"] == f["slug"]
         assert s["slug"] == f["slug"]
         assert s["nickname"] == f["nickname"]
         assert s["nickname"] == f["nickname"]
         assert s["text"] == f["text"]
         assert s["text"] == f["text"]
+        assert s["ip_v4"] == f["ip_v4"]
+        assert s["ip_v6"] == f["ip_v6"]
+        assert s["url"] == f["url"]
+        assert s["uuid"] == f["uuid"]
 
 
     def test_good_date_all_sanitizers_no_delta(self):
     def test_good_date_all_sanitizers_no_delta(self):
         faked = self.serialize_to_json_data(
         faked = self.serialize_to_json_data(
@@ -164,6 +208,10 @@ class SanitizationUnitTests(TestCase):
                     slug=FAKE_SLUG,
                     slug=FAKE_SLUG,
                     nickname=FAKE_NICKNAME,
                     nickname=FAKE_NICKNAME,
                     text=FAKE_TEXT,
                     text=FAKE_TEXT,
+                    ip_v4=FAKE_IP_V4,
+                    ip_v6=FAKE_IP_V6,
+                    url=FAKE_URL,
+                    uuid=FAKE_UUID,
                 )
                 )
             ]
             ]
         )
         )
@@ -177,6 +225,10 @@ class SanitizationUnitTests(TestCase):
         assert f["slug"] != s["slug"]
         assert f["slug"] != s["slug"]
         assert f["nickname"] != s["nickname"]
         assert f["nickname"] != s["nickname"]
         assert f["text"] != s["text"]
         assert f["text"] != s["text"]
+        assert f["ip_v4"] != s["ip_v4"]
+        assert f["ip_v6"] != s["ip_v6"]
+        assert f["url"] != s["url"]
+        assert f["uuid"] != s["uuid"]
         assert s["date_added"] < s["date_updated"]
         assert s["date_added"] < s["date_updated"]
 
 
     def test_good_dates_preserve_ordering(self):
     def test_good_dates_preserve_ordering(self):