Browse Source

feat(hybrid): Introduce region function contracts (#39253)

Set up a type system for region objects and the set of all regions, with
placeholders for backend operations not implemented yet.
Ryan Skonnord 2 years ago
parent
commit
a7d5d84276

+ 1 - 0
mypy.ini

@@ -108,6 +108,7 @@ files = src/sentry/analytics/,
         src/sentry/tasks/symbolication.py,
         src/sentry/tasks/update_user_reports.py,
         src/sentry/testutils/silo.py,
+        src/sentry/types/region.py,
         src/sentry/unmerge.py,
         src/sentry/utils/appleconnect/,
         src/sentry/utils/hashlib.py,

+ 3 - 3
src/sentry/conf/server.py

@@ -10,7 +10,7 @@ import socket
 import sys
 import tempfile
 from datetime import datetime, timedelta
-from typing import Mapping
+from typing import Iterable
 from urllib.parse import urlparse
 
 import sentry
@@ -2787,7 +2787,6 @@ MAX_QUERY_SUBSCRIPTIONS_PER_ORG = 1000
 MAX_REDIS_SNOWFLAKE_RETRY_COUNTER = 5
 
 SNOWFLAKE_VERSION_ID = 1
-SNOWFLAKE_REGION_ID = 0
 SENTRY_SNOWFLAKE_EPOCH_START = datetime(2022, 8, 8, 0, 0).timestamp()
 SENTRY_USE_SNOWFLAKE = False
 
@@ -2819,4 +2818,5 @@ DISALLOWED_CUSTOMER_DOMAINS = []
 
 SENTRY_PERFORMANCE_ISSUES_RATE_LIMITER_OPTIONS = {}
 
-SENTRY_REGION_CONFIG: Mapping[str, Region] = {}
+SENTRY_REGION = os.environ.get("SENTRY_REGION", None)
+SENTRY_REGION_CONFIG: Iterable[Region] = ()

+ 3 - 1
src/sentry/testutils/cases.py

@@ -140,6 +140,7 @@ from .helpers import (
     override_options,
     parse_queries,
 )
+from .silo import exempt_from_silo_limits
 from .skips import requires_snuba
 
 DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
@@ -239,7 +240,8 @@ class BaseTestCase(Fixtures, Exam):
         user.backend = settings.AUTHENTICATION_BACKENDS[0]
 
         request = self.make_request()
-        login(request, user)
+        with exempt_from_silo_limits():
+            login(request, user)
         request.user = user
 
         if organization_ids is None:

+ 24 - 0
src/sentry/testutils/region.py

@@ -0,0 +1,24 @@
+from contextlib import contextmanager
+from typing import Iterable
+from unittest import mock
+
+from sentry.types.region import Region, _RegionMapping
+
+
+@contextmanager
+def override_regions(regions: Iterable[Region]):
+    """Override the global set of existing regions.
+
+    The overriding value takes the place of the `SENTRY_REGION_CONFIG` setting and
+    changes the behavior of the module-level functions in `sentry.types.region`. This
+    is preferable to overriding the `SENTRY_REGION_CONFIG` setting value directly
+    because the region mapping may already be cached.
+    """
+
+    mapping = _RegionMapping(regions)
+
+    def override() -> _RegionMapping:
+        return mapping
+
+    with mock.patch("sentry.types.region._load_global_regions", new=override):
+        yield

+ 129 - 3
src/sentry/types/region.py

@@ -1,8 +1,134 @@
+from __future__ import annotations
+
+import functools
 from dataclasses import dataclass
+from enum import Enum
+from typing import TYPE_CHECKING, Iterable
+
+from sentry.silo import SiloMode
+
+if TYPE_CHECKING:
+    from sentry.models import Organization
+
+
+class RegionCategory(Enum):
+    MULTI_TENANT = "MULTI_TENANT"
+    SINGLE_TENANT = "SINGLE_TENANT"
 
 
-@dataclass
+@dataclass(frozen=True, eq=True)
 class Region:
+    """A region of the Sentry platform, hosted by a region silo."""
+
     name: str
-    subdomain: str
-    is_private: bool = False
+    """The region's unique identifier."""
+
+    id: int
+    """The region's unique numeric representation.
+
+    This number is used for composing "snowflake" IDs, and must fit inside the
+    maximum bit length specified by our snowflake ID schema.
+    """
+
+    address: str
+    """The address of the region's silo.
+
+    Represent a region's hostname or subdomain in a production environment
+    (e.g., "eu.sentry.io"), and addresses such as "localhost:8001" in a dev
+    environment.
+
+    (This attribute is a placeholder. Please update this docstring when its
+    contract becomes more stable.)
+    """
+
+    category: RegionCategory
+    """The region's category."""
+
+    def __post_init__(self) -> None:
+        from sentry.utils.snowflake import NULL_REGION_ID, REGION_ID
+
+        REGION_ID.validate(self.id)
+        if self.id == NULL_REGION_ID:
+            raise ValueError(f"Region ID {NULL_REGION_ID} is reserved for non-multi-region systems")
+
+    def to_url(self, path: str) -> str:
+        """Resolve a path into a URL on this region's silo.
+
+        (This method is a placeholder. See the `address` attribute.)
+        """
+        return self.address + path
+
+
+class RegionResolutionError(Exception):
+    """Indicate that a region's identity could not be resolved."""
+
+
+class RegionContextError(Exception):
+    """Indicate that the server is not in a state to resolve a region."""
+
+
+class _RegionMapping:
+    """The set of all regions in this Sentry platform instance."""
+
+    def __init__(self, regions: Iterable[Region]) -> None:
+        self.regions = frozenset(regions)
+        self.by_name = {r.name: r for r in self.regions}
+        self.by_id = {r.id: r for r in self.regions}
+
+
+@functools.lru_cache(maxsize=1)
+def _load_global_regions() -> _RegionMapping:
+    from django.conf import settings
+
+    # For now, assume that all region configs can be taken in through Django
+    # settings. We may investigate other ways of delivering those configs in
+    # production.
+    return _RegionMapping(settings.SENTRY_REGION_CONFIG)
+
+
+def get_region_by_name(name: str) -> Region:
+    """Look up a region by name."""
+    try:
+        return _load_global_regions().by_name[name]
+    except KeyError:
+        raise RegionResolutionError(f"No region with name: {name!r}")
+
+
+def get_region_by_id(id: int) -> Region:
+    """Look up a region by numeric ID."""
+    try:
+        return _load_global_regions().by_id[id]
+    except KeyError:
+        raise RegionResolutionError(f"No region with numeric ID: {id}")
+
+
+def get_region_for_organization(organization: Organization) -> Region:
+    """Resolve an organization to the region where its data is stored.
+
+    Raises RegionContextError if this Sentry platform instance is configured to
+    run only in monolith mode.
+    """
+    mapping = _load_global_regions()
+
+    if not mapping.regions:
+        raise RegionContextError("No regions are configured")
+
+    # Backend representation to be determined. If you are working on code
+    # that depends on this method, you can mock it out in unit tests or
+    # temporarily hard-code a placeholder.
+    raise NotImplementedError
+
+
+def get_local_region() -> Region | None:
+    """Get the region in which this server instance is running.
+
+    Raises RegionContextError if this server instance is not a region silo.
+    """
+    from django.conf import settings
+
+    if SiloMode.get_current_mode() != SiloMode.REGION:
+        raise RegionContextError("Not a region silo")
+
+    if not settings.SENTRY_REGION:
+        raise Exception("SENTRY_REGION must be set when server is in REGION silo mode")
+    return get_region_by_name(settings.SENTRY_REGION)

+ 11 - 4
src/sentry/utils/snowflake.py

@@ -7,6 +7,7 @@ from django.db import IntegrityError, transaction
 from rest_framework import status
 from rest_framework.exceptions import APIException
 
+from sentry.types.region import RegionContextError, get_local_region
 from sentry.utils import redis
 
 _TTL = timedelta(minutes=5)
@@ -60,6 +61,8 @@ assert ID_VALIDATOR.length == 53
 MAX_AVAILABLE_REGION_SEQUENCES = 1 << REGION_SEQUENCE.length
 assert MAX_AVAILABLE_REGION_SEQUENCES > 0
 
+NULL_REGION_ID = 0
+
 
 def msb_0_ordering(value, width):
     """
@@ -72,10 +75,14 @@ def msb_0_ordering(value, width):
 
 
 def generate_snowflake_id(redis_key: str) -> int:
-    segment_values = {
-        VERSION_ID: msb_0_ordering(settings.SNOWFLAKE_VERSION_ID, VERSION_ID.length),
-        REGION_ID: settings.SNOWFLAKE_REGION_ID,
-    }
+    segment_values = {}
+
+    segment_values[VERSION_ID] = msb_0_ordering(settings.SNOWFLAKE_VERSION_ID, VERSION_ID.length)
+
+    try:
+        segment_values[REGION_ID] = get_local_region().id
+    except RegionContextError:  # expected if running in monolith mode
+        segment_values[REGION_ID] = NULL_REGION_ID
 
     current_time = datetime.now().timestamp()
     # supports up to 130 years

+ 0 - 0
tests/sentry/types/__init__.py


+ 53 - 0
tests/sentry/types/test_region.py

@@ -0,0 +1,53 @@
+import pytest
+from django.test import override_settings
+
+from sentry.silo import SiloMode
+from sentry.testutils import TestCase
+from sentry.testutils.region import override_regions
+from sentry.types.region import (
+    Region,
+    RegionCategory,
+    RegionContextError,
+    RegionResolutionError,
+    get_local_region,
+    get_region_by_id,
+    get_region_by_name,
+    get_region_for_organization,
+)
+
+
+class RegionMappingTest(TestCase):
+    def test_region_mapping(self):
+        regions = [
+            Region("north_america", 1, "na.sentry.io", RegionCategory.MULTI_TENANT),
+            Region("europe", 2, "eu.sentry.io", RegionCategory.MULTI_TENANT),
+            Region("acme-single-tenant", 3, "acme.my.sentry.io", RegionCategory.SINGLE_TENANT),
+        ]
+        with override_regions(regions):
+            assert get_region_by_id(1) == regions[0]
+            assert get_region_by_name("europe") == regions[1]
+
+            with pytest.raises(RegionResolutionError):
+                get_region_by_id(4)
+            with pytest.raises(RegionResolutionError):
+                get_region_by_name("nowhere")
+
+    def test_get_for_organization(self):
+        with override_regions(()):
+            org = self.create_organization()
+            with pytest.raises(RegionContextError):
+                get_region_for_organization(org)
+
+    def test_get_local_region(self):
+        regions = [
+            Region("north_america", 1, "na.sentry.io", RegionCategory.MULTI_TENANT),
+            Region("europe", 2, "eu.sentry.io", RegionCategory.MULTI_TENANT),
+        ]
+
+        with override_regions(regions):
+            with override_settings(SILO_MODE=SiloMode.REGION, SENTRY_REGION="north_america"):
+                assert get_local_region() == regions[0]
+
+            with override_settings(SILO_MODE=SiloMode.MONOLITH):
+                with pytest.raises(RegionContextError):
+                    get_local_region()

+ 28 - 0
tests/sentry/utils/test_snowflake.py

@@ -2,12 +2,18 @@ from datetime import datetime
 
 import pytest
 from django.conf import settings
+from django.test import override_settings
 from freezegun import freeze_time
 
+from sentry.silo import SiloMode
 from sentry.testutils import TestCase
+from sentry.testutils.region import override_regions
+from sentry.types.region import Region, RegionCategory
+from sentry.utils import snowflake
 from sentry.utils.snowflake import (
     _TTL,
     MAX_AVAILABLE_REGION_SEQUENCES,
+    SnowflakeBitSegment,
     generate_snowflake_id,
     get_redis_cluster,
 )
@@ -57,3 +63,25 @@ class SnowflakeUtilsTest(TestCase):
             generate_snowflake_id("test_redis_key")
 
         assert str(context.value) == "No available ID"
+
+    @freeze_time(CURRENT_TIME)
+    def test_generate_correct_ids_with_region_id(self):
+        regions = [
+            Region("test-region-1", 1, "localhost:8001", RegionCategory.MULTI_TENANT),
+            Region("test-region-2", 2, "localhost:8002", RegionCategory.MULTI_TENANT),
+        ]
+        with override_regions(regions):
+
+            with override_settings(SILO_MODE=SiloMode.REGION, SENTRY_REGION="test-region-1"):
+                snowflake1 = generate_snowflake_id("test_redis_key")
+            with override_settings(SILO_MODE=SiloMode.REGION, SENTRY_REGION="test-region-2"):
+                snowflake2 = generate_snowflake_id("test_redis_key")
+
+            def recover_segment_value(segment: SnowflakeBitSegment, value: int) -> int:
+                for s in reversed(snowflake.BIT_SEGMENT_SCHEMA):
+                    if s == segment:
+                        return value & ((1 << s.length) - 1)
+                    value >>= s.length
+
+            assert recover_segment_value(snowflake.REGION_ID, snowflake1) == regions[0].id
+            assert recover_segment_value(snowflake.REGION_ID, snowflake2) == regions[1].id