Browse Source

feat(ds): Send boosted releases to relay as part of ProjectConfig [TET-497] (#40415)

Sends boosted release ids as dynamic sampling with start and end time
ranges to relay as part of the project config
Continuation of #40403

Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
Ahmed Etefy 2 years ago
parent
commit
dba4bbf6a6

+ 1 - 0
mypy.ini

@@ -44,6 +44,7 @@ files = fixtures/mypy-stubs,
         src/sentry/db/models/query.py,
         src/sentry/db/models/utils.py,
         src/sentry/digests/,
+        src/sentry/dynamic_sampling/,
         src/sentry/eventstream/base.py,
         src/sentry/eventstream/snuba.py,
         src/sentry/eventstream/kafka/consumer_strategy.py,

+ 0 - 32
src/sentry/dynamic_sampling/__init__.py

@@ -1,32 +0,0 @@
-from typing import List
-
-import sentry_sdk
-
-from sentry import quotas
-from sentry.dynamic_sampling.feature_multiplexer import DynamicSamplingFeatureMultiplexer
-from sentry.dynamic_sampling.utils import BaseRule, generate_environment_rule, generate_uniform_rule
-from sentry.models import Project
-
-
-def generate_rules(project: Project) -> List[BaseRule]:
-    """
-    This function handles generate rules logic or fallback empty list of rules
-    """
-    rules = []
-
-    sample_rate = quotas.get_blended_sample_rate(project)
-
-    if sample_rate is None:
-        try:
-            raise Exception("get_blended_sample_rate returns none")
-        except Exception:
-            sentry_sdk.capture_exception()
-    else:
-        boost_environments = DynamicSamplingFeatureMultiplexer.get_user_bias_by_id(
-            "boostEnvironments", project.get_option("sentry:dynamic_sampling_biases", None)
-        )
-        if boost_environments["active"] and sample_rate < 1.0:
-            rules.append(generate_environment_rule())
-        rules.append(generate_uniform_rule(sample_rate))
-
-    return rules

+ 5 - 7
src/sentry/dynamic_sampling/feature_multiplexer.py

@@ -60,13 +60,11 @@ class DynamicSamplingFeatureMultiplexer:
                 returned_biases.append(bias)
         return returned_biases
 
+    @classmethod
+    def get_enabled_user_biases(cls, user_set_biases: Optional[List[Bias]]) -> Set[str]:
+        users_biases = cls.get_user_biases(user_set_biases)
+        return {bias["id"] for bias in users_biases if bias["active"]}
+
     @staticmethod
     def get_supported_biases_ids() -> Set[str]:
         return {bias["id"] for bias in DEFAULT_BIASES}
-
-    @classmethod
-    def get_user_bias_by_id(cls, bias_id: str, user_set_biases: Optional[List[Bias]]) -> Bias:
-        for bias in cls.get_user_biases(user_set_biases):
-            if bias["id"] == bias_id:
-                return bias
-        raise ValueError(f"{bias_id} is not in supported biases")

+ 9 - 8
src/sentry/dynamic_sampling/latest_release_booster.py

@@ -1,39 +1,40 @@
 from datetime import datetime
+from typing import Any, List, Tuple
 
 from django.conf import settings
 from pytz import UTC
 
+from sentry.dynamic_sampling.utils import BOOSTED_RELEASES_LIMIT
 from sentry.utils import redis
 
 BOOSTED_RELEASE_TIMEOUT = 60 * 60
 ONE_DAY_TIMEOUT_MS = 60 * 60 * 24 * 1000
-BOOSTED_RELEASES_LIMIT = 10
 
 
 class TooManyBoostedReleasesException(Exception):
     pass
 
 
-def get_redis_client_for_ds():
+def get_redis_client_for_ds() -> Any:
     cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default")
     return redis.redis_clusters.get(cluster_key)
 
 
-def generate_cache_key_for_observed_release(project_id, release_id):
+def generate_cache_key_for_observed_release(project_id: int, release_id: int) -> str:
     """
     Generates a cache key for releases that had a transaction observed in the last 24 hours
     """
     return f"ds::p:{project_id}:r:{release_id}"
 
 
-def generate_cache_key_for_boosted_release(project_id):
+def generate_cache_key_for_boosted_release(project_id: int) -> str:
     """
     Generates a cache key for the boosted releases for a given project.
     """
     return f"ds::p:{project_id}:boosted_releases"
 
 
-def observe_release(project_id, release_id):
+def observe_release(project_id: int, release_id: int) -> bool:
     """
     Checks if release was observed in the last 24 hours, and resets the cache timeout. If the release was observed,
     returns True otherwise returns False.
@@ -49,10 +50,10 @@ def observe_release(project_id, release_id):
     #  versions these two operations can be done in a single call.
     release_observed = redis_client.getset(name=cache_key, value=1)
     redis_client.pexpire(cache_key, ONE_DAY_TIMEOUT_MS)
-    return release_observed == "1"
+    return release_observed == "1"  # type: ignore
 
 
-def get_boosted_releases(project_id):
+def get_boosted_releases(project_id: int) -> List[Tuple[int, float]]:
     """
     Function that returns the releases that should be boosted for a given project, and excludes expired releases.
     """
@@ -75,7 +76,7 @@ def get_boosted_releases(project_id):
     return boosted_releases
 
 
-def add_boosted_release(project_id, release_id):
+def add_boosted_release(project_id: int, release_id: int) -> None:
     """
     Function that adds a release to the list of active boosted releases for a given project.
     """

+ 135 - 0
src/sentry/dynamic_sampling/rules_generator.py

@@ -0,0 +1,135 @@
+from datetime import datetime
+from typing import List, Optional, Union, cast
+
+import sentry_sdk
+from pytz import UTC
+
+from sentry import quotas
+from sentry.dynamic_sampling.feature_multiplexer import DynamicSamplingFeatureMultiplexer
+from sentry.dynamic_sampling.latest_release_booster import (
+    BOOSTED_RELEASE_TIMEOUT,
+    get_boosted_releases,
+)
+from sentry.dynamic_sampling.utils import (
+    BOOSTED_RELEASES_LIMIT,
+    RELEASE_BOOST_FACTOR,
+    RESERVED_IDS,
+    BaseRule,
+    ReleaseRule,
+    RuleType,
+)
+from sentry.models import Project, Release
+
+
+def generate_uniform_rule(sample_rate: Optional[float]) -> BaseRule:
+    return {
+        "sampleRate": sample_rate,
+        "type": "trace",
+        "active": True,
+        "condition": {
+            "op": "and",
+            "inner": [],
+        },
+        "id": RESERVED_IDS[RuleType.UNIFORM_RULE],
+    }
+
+
+def generate_environment_rule() -> BaseRule:
+    return {
+        "sampleRate": 1,
+        "type": "trace",
+        "condition": {
+            "op": "or",
+            "inner": [
+                {
+                    "op": "glob",
+                    "name": "trace.environment",
+                    "value": ["*dev*", "*test*"],
+                    "options": {"ignoreCase": True},
+                }
+            ],
+        },
+        "active": True,
+        "id": RESERVED_IDS[RuleType.BOOST_ENVIRONMENTS_RULE],
+    }
+
+
+def generate_boost_release_rules(project_id: int, sample_rate: float) -> List[ReleaseRule]:
+    boosted_release_in_cache = get_boosted_releases(project_id)
+    if not boosted_release_in_cache:
+        return []
+
+    # Capped to latest 5 releases
+    boosted_releases_objs = Release.objects.filter(
+        id__in=[r[0] for r in boosted_release_in_cache[-BOOSTED_RELEASES_LIMIT:]]
+    )
+    boosted_releases_dict = {release.id: release.version for release in boosted_releases_objs}
+
+    boosted_release_versions = []
+    for (release_id, timestamp) in boosted_release_in_cache:
+        if release_id not in boosted_releases_dict:
+            continue
+        boosted_release_versions.append((boosted_releases_dict[release_id], timestamp))
+
+    boosted_sample_rate = min(1.0, sample_rate * RELEASE_BOOST_FACTOR)
+    return cast(
+        List[ReleaseRule],
+        [
+            {
+                "sampleRate": boosted_sample_rate,
+                "type": "trace",
+                "active": True,
+                "condition": {
+                    "op": "and",
+                    "inner": [
+                        {
+                            "op": "glob",
+                            "name": "trace.release",
+                            "value": [release_version],
+                        }
+                    ],
+                },
+                "id": RESERVED_IDS[RuleType.BOOST_LATEST_RELEASES_RULE] + idx,
+                "timeRange": {
+                    "start": str(datetime.utcfromtimestamp(timestamp).replace(tzinfo=UTC)),
+                    "end": str(
+                        datetime.utcfromtimestamp(timestamp + BOOSTED_RELEASE_TIMEOUT).replace(
+                            tzinfo=UTC
+                        )
+                    ),
+                },
+            }
+            for idx, (release_version, timestamp) in enumerate(boosted_release_versions)
+        ],
+    )
+
+
+def generate_rules(project: Project) -> List[Union[BaseRule, ReleaseRule]]:
+    """
+    This function handles generate rules logic or fallback empty list of rules
+    """
+    rules: List[Union[BaseRule, ReleaseRule]] = []
+
+    sample_rate = quotas.get_blended_sample_rate(project)
+
+    if sample_rate is None:
+        try:
+            raise Exception("get_blended_sample_rate returns none")
+        except Exception:
+            sentry_sdk.capture_exception()
+    else:
+        if sample_rate < 1.0:
+
+            enabled_biases = DynamicSamplingFeatureMultiplexer.get_enabled_user_biases(
+                project.get_option("sentry:dynamic_sampling_biases", None)
+            )
+            # Latest releases
+            if RuleType.BOOST_LATEST_RELEASES_RULE.value in enabled_biases:
+                rules += generate_boost_release_rules(project.id, sample_rate)
+
+            # Environments boost
+            if RuleType.BOOST_ENVIRONMENTS_RULE.value in enabled_biases:
+                rules.append(generate_environment_rule())
+        rules.append(generate_uniform_rule(sample_rate))
+
+    return rules

+ 26 - 35
src/sentry/dynamic_sampling/utils.py

@@ -1,6 +1,8 @@
+from enum import Enum
 from typing import Dict, List, Optional, TypedDict
 
-UNIFORM_RULE_RESERVED_ID = 0
+BOOSTED_RELEASES_LIMIT = 10
+RELEASE_BOOST_FACTOR = 5
 
 
 class Bias(TypedDict):
@@ -10,14 +12,27 @@ class Bias(TypedDict):
 
 # These represent the biases that are applied to user by default as part of the adaptive dynamic sampling experience.
 # These can be overridden by the project details endpoint
+class RuleType(Enum):
+    UNIFORM_RULE = "uniformRule"
+    BOOST_ENVIRONMENTS_RULE = "boostEnvironments"
+    BOOST_LATEST_RELEASES_RULE = "boostLatestRelease"
+    IGNORE_HEALTHCHECKS_RULE = "ignoreHealthChecks"
+
+
 DEFAULT_BIASES: List[Bias] = [
-    {"id": "boostEnvironments", "active": True},
+    {"id": RuleType.BOOST_ENVIRONMENTS_RULE.value, "active": True},
     {
-        "id": "boostLatestRelease",
+        "id": RuleType.BOOST_LATEST_RELEASES_RULE.value,
         "active": True,
     },
-    {"id": "ignoreHealthChecks", "active": True},
+    {"id": RuleType.IGNORE_HEALTHCHECKS_RULE.value, "active": True},
 ]
+RESERVED_IDS = {
+    RuleType.UNIFORM_RULE: 1000,
+    RuleType.BOOST_ENVIRONMENTS_RULE: 1001,
+    RuleType.IGNORE_HEALTHCHECKS_RULE: 1002,
+    RuleType.BOOST_LATEST_RELEASES_RULE: 1500,
+}
 
 
 class Inner(TypedDict):
@@ -40,34 +55,10 @@ class BaseRule(TypedDict):
     id: int
 
 
-def generate_uniform_rule(sample_rate: Optional[float]) -> BaseRule:
-    return {
-        "sampleRate": sample_rate,
-        "type": "trace",
-        "active": True,
-        "condition": {
-            "op": "and",
-            "inner": [],
-        },
-        "id": UNIFORM_RULE_RESERVED_ID,
-    }
-
-
-def generate_environment_rule() -> BaseRule:
-    return {
-        "sampleRate": 1,
-        "type": "trace",
-        "condition": {
-            "op": "or",
-            "inner": [
-                {
-                    "op": "glob",
-                    "name": "trace.environment",
-                    "value": ["*dev*", "*test*"],
-                    "options": {"ignoreCase": True},
-                }
-            ],
-        },
-        "active": True,
-        "id": 1,
-    }
+class TimeRange(TypedDict):
+    start: str
+    end: str
+
+
+class ReleaseRule(BaseRule):
+    timeRange: Optional[TimeRange]

+ 1 - 1
src/sentry/event_manager.py

@@ -39,7 +39,7 @@ from sentry.constants import (
     DataCategory,
 )
 from sentry.culprit import generate_culprit
-from sentry.dynamic_sampling import DynamicSamplingFeatureMultiplexer
+from sentry.dynamic_sampling.feature_multiplexer import DynamicSamplingFeatureMultiplexer
 from sentry.dynamic_sampling.latest_release_booster import (
     TooManyBoostedReleasesException,
     add_boosted_release,

+ 1 - 1
src/sentry/relay/config/__init__.py

@@ -20,8 +20,8 @@ from sentry_sdk import Hub, capture_exception
 from sentry import features, killswitches, quotas, utils
 from sentry.constants import ObjectStatus
 from sentry.datascrubbing import get_datascrubbing_settings, get_pii_config
-from sentry.dynamic_sampling import generate_rules
 from sentry.dynamic_sampling.feature_multiplexer import DynamicSamplingFeatureMultiplexer
+from sentry.dynamic_sampling.rules_generator import generate_rules
 from sentry.grouping.api import get_grouping_config_dict_for_project
 from sentry.ingest.inbound_filters import (
     FilterStatKeys,

+ 149 - 15
tests/sentry/dynamic_sampling/test_generate_rules.py

@@ -1,10 +1,18 @@
+import time
+from unittest import mock
 from unittest.mock import MagicMock, patch
 
-from sentry.dynamic_sampling import generate_rules
+from freezegun import freeze_time
+from sentry_relay.processing import validate_sampling_configuration
 
+from sentry.dynamic_sampling.latest_release_booster import get_redis_client_for_ds
+from sentry.dynamic_sampling.rules_generator import generate_rules
+from sentry.testutils import TestCase
+from sentry.utils import json
 
-@patch("sentry.dynamic_sampling.sentry_sdk")
-@patch("sentry.dynamic_sampling.quotas.get_blended_sample_rate")
+
+@patch("sentry.dynamic_sampling.rules_generator.sentry_sdk")
+@patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
 def test_generate_rules_capture_exception(get_blended_sample_rate, sentry_sdk):
     get_blended_sample_rate.return_value = None
     # since we mock get_blended_sample_rate function
@@ -18,11 +26,13 @@ def test_generate_rules_capture_exception(get_blended_sample_rate, sentry_sdk):
 
 
 @patch(
-    "sentry.dynamic_sampling.feature_multiplexer.DynamicSamplingFeatureMultiplexer.get_user_bias_by_id"
+    "sentry.dynamic_sampling.feature_multiplexer.DynamicSamplingFeatureMultiplexer.get_enabled_user_biases"
 )
-@patch("sentry.dynamic_sampling.quotas.get_blended_sample_rate")
-def test_generate_rules_return_uniform_rules_with_rate(get_blended_sample_rate, get_user_bias):
-    get_user_bias.return_value = {"id": "boostEnvironments", "active": False}
+@patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
+def test_generate_rules_return_uniform_rules_with_rate(
+    get_blended_sample_rate, get_enabled_user_biases
+):
+    get_enabled_user_biases.return_value = {"id": "boostEnvironments", "active": False}
     get_blended_sample_rate.return_value = 0.1
     # since we mock get_blended_sample_rate function
     # no need to create real project in DB
@@ -31,18 +41,18 @@ def test_generate_rules_return_uniform_rules_with_rate(get_blended_sample_rate,
         {
             "active": True,
             "condition": {"inner": [], "op": "and"},
-            "id": 0,
+            "id": 1000,
             "sampleRate": 0.1,
             "type": "trace",
         }
     ]
     get_blended_sample_rate.assert_called_with(fake_project)
-    get_user_bias.assert_called_with(
-        "boostEnvironments", fake_project.get_option("sentry:dynamic_sampling_biases", None)
+    get_enabled_user_biases.assert_called_with(
+        fake_project.get_option("sentry:dynamic_sampling_biases", None)
     )
 
 
-@patch("sentry.dynamic_sampling.quotas.get_blended_sample_rate")
+@patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
 def test_generate_rules_return_uniform_rules_and_env_rule(get_blended_sample_rate):
     get_blended_sample_rate.return_value = 0.1
     # since we mock get_blended_sample_rate function
@@ -64,12 +74,12 @@ def test_generate_rules_return_uniform_rules_and_env_rule(get_blended_sample_rat
                 ],
             },
             "active": True,
-            "id": 1,
+            "id": 1001,
         },
         {
             "active": True,
             "condition": {"inner": [], "op": "and"},
-            "id": 0,
+            "id": 1000,
             "sampleRate": 0.1,
             "type": "trace",
         },
@@ -77,7 +87,7 @@ def test_generate_rules_return_uniform_rules_and_env_rule(get_blended_sample_rat
     get_blended_sample_rate.assert_called_with(fake_project)
 
 
-@patch("sentry.dynamic_sampling.quotas.get_blended_sample_rate")
+@patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
 def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule(
     get_blended_sample_rate,
 ):
@@ -89,9 +99,133 @@ def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule(
         {
             "active": True,
             "condition": {"inner": [], "op": "and"},
-            "id": 0,
+            "id": 1000,
             "sampleRate": 1.0,
             "type": "trace",
         },
     ]
     get_blended_sample_rate.assert_called_with(fake_project)
+
+
+class LatestReleaseTest(TestCase):
+    def setUp(self):
+        self.project.update_option(
+            "sentry:dynamic_sampling_biases", [{"id": "boostEnvironments", "active": False}]
+        )
+        self.redis_client = get_redis_client_for_ds()
+
+    @freeze_time("2022-10-21 18:50:25.000000+00:00")
+    @patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
+    def test_generate_rules_return_uniform_rules_and_latest_release_rule(
+        self, get_blended_sample_rate
+    ):
+        get_blended_sample_rate.return_value = 0.1
+        # since we mock get_blended_sample_rate function
+        # no need to create real project in DB
+        ts = time.time()
+
+        self.redis_client.hset(f"ds::p:{self.project.id}:boosted_releases", self.release.id, ts)
+
+        expected = [
+            {
+                "sampleRate": 0.5,
+                "type": "trace",
+                "active": True,
+                "condition": {
+                    "op": "and",
+                    "inner": [{"op": "glob", "name": "trace.release", "value": ["foo-1.0"]}],
+                },
+                "id": 1500,
+                "timeRange": {
+                    "start": "2022-10-21 18:50:25+00:00",
+                    "end": "2022-10-21 19:50:25+00:00",
+                },
+            },
+            {
+                "active": True,
+                "condition": {"inner": [], "op": "and"},
+                "id": 1000,
+                "sampleRate": 0.1,
+                "type": "trace",
+            },
+        ]
+        assert generate_rules(self.project) == expected
+        config_str = json.dumps({"rules": expected})
+        validate_sampling_configuration(config_str)
+
+    @patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
+    def test_generate_rules_return_uniform_rule_with_100_rate_and_without_latest_release_rule(
+        self,
+        get_blended_sample_rate,
+    ):
+        get_blended_sample_rate.return_value = 1.0
+        assert generate_rules(self.project) == [
+            {
+                "active": True,
+                "condition": {"inner": [], "op": "and"},
+                "id": 1000,
+                "sampleRate": 1.0,
+                "type": "trace",
+            },
+        ]
+
+    @patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
+    def test_generate_rules_return_uniform_rule_with_non_existent_releases(
+        self,
+        get_blended_sample_rate,
+    ):
+        get_blended_sample_rate.return_value = 1.0
+        self.redis_client.hset(f"ds::p:{self.project.id}:boosted_releases", 34345, time.time())
+        assert generate_rules(self.project) == [
+            {
+                "active": True,
+                "condition": {"inner": [], "op": "and"},
+                "id": 1000,
+                "sampleRate": 1.0,
+                "type": "trace",
+            },
+        ]
+
+    @freeze_time("2022-10-21 18:50:25.000000+00:00")
+    @patch("sentry.dynamic_sampling.rules_generator.quotas.get_blended_sample_rate")
+    @mock.patch("sentry.dynamic_sampling.rules_generator.BOOSTED_RELEASES_LIMIT", 1)
+    def test_generate_rules_return_uniform_rule_with_more_releases_than_the_limit(
+        self,
+        get_blended_sample_rate,
+    ):
+        get_blended_sample_rate.return_value = 0.1
+        release_2 = self.create_release(self.project, version="foo-2.0")
+
+        self.redis_client.hset(
+            f"ds::p:{self.project.id}:boosted_releases", self.release.id, time.time()
+        )
+        self.redis_client.hset(
+            f"ds::p:{self.project.id}:boosted_releases", release_2.id, time.time()
+        )
+
+        expected = [
+            {
+                "sampleRate": 0.5,
+                "type": "trace",
+                "active": True,
+                "condition": {
+                    "op": "and",
+                    "inner": [{"op": "glob", "name": "trace.release", "value": ["foo-2.0"]}],
+                },
+                "id": 1500,
+                "timeRange": {
+                    "start": "2022-10-21 18:50:25+00:00",
+                    "end": "2022-10-21 19:50:25+00:00",
+                },
+            },
+            {
+                "active": True,
+                "condition": {"inner": [], "op": "and"},
+                "id": 1000,
+                "sampleRate": 0.1,
+                "type": "trace",
+            },
+        ]
+        assert generate_rules(self.project) == expected
+        config_str = json.dumps({"rules": expected})
+        validate_sampling_configuration(config_str)

+ 3 - 3
tests/sentry/dynamic_sampling/test_utils.py

@@ -1,4 +1,4 @@
-from sentry.dynamic_sampling.utils import generate_environment_rule, generate_uniform_rule
+from sentry.dynamic_sampling.rules_generator import generate_environment_rule, generate_uniform_rule
 
 
 def test_generate_uniform_rule_return_rate():
@@ -6,7 +6,7 @@ def test_generate_uniform_rule_return_rate():
     assert generate_uniform_rule(sample_rate) == {
         "active": True,
         "condition": {"inner": [], "op": "and"},
-        "id": 0,
+        "id": 1000,
         "sampleRate": sample_rate,
         "type": "trace",
     }
@@ -14,7 +14,7 @@ def test_generate_uniform_rule_return_rate():
 
 def test_generate_environment_rule():
     bias_env_rule = generate_environment_rule()
-    assert bias_env_rule["id"] == 1
+    assert bias_env_rule["id"] == 1001
     assert bias_env_rule["condition"]["inner"][0] == {
         "op": "glob",
         "name": "trace.environment",

Some files were not shown because too many files changed in this diff