Browse Source

feat(dynamic-sampling): Implement dynamic factor function and new factor-based rules [TET-702] (#44500)

Riccardo Busetti 2 years ago
parent
commit
7f5822a900

+ 0 - 1
src/sentry/dynamic_sampling/rules/base.py

@@ -39,7 +39,6 @@ def _get_rules_of_enabled_biases(
         ):
             rules += bias.get_rules(BiasParams(project, base_sample_rate))
 
-    # We want to log only rules v2, to avoid confusion and duplication.
     log_rules(project.organization.id, project.id, rules)
 
     return rules

+ 7 - 4
src/sentry/dynamic_sampling/rules/biases/boost_key_transactions_bias.py

@@ -9,10 +9,11 @@ from sentry.dynamic_sampling.rules.biases.base import (
 )
 from sentry.dynamic_sampling.rules.helpers.key_transactions import get_key_transactions
 from sentry.dynamic_sampling.rules.utils import (
-    KEY_TRANSACTION_BOOST_FACTOR,
+    KEY_TRANSACTIONS_BOOST_FACTOR,
     RESERVED_IDS,
     PolymorphicRule,
     RuleType,
+    apply_dynamic_factor,
 )
 
 
@@ -20,7 +21,9 @@ class BoostKeyTransactionsDataProvider(BiasDataProvider):
     def get_bias_data(self, bias_params: BiasParams) -> BiasData:
         return {
             "id": RESERVED_IDS[RuleType.BOOST_KEY_TRANSACTIONS_RULE],
-            "sampleRate": min(1.0, bias_params.base_sample_rate * KEY_TRANSACTION_BOOST_FACTOR),
+            "factor": apply_dynamic_factor(
+                bias_params.base_sample_rate, KEY_TRANSACTIONS_BOOST_FACTOR
+            ),
             "keyTransactions": get_key_transactions(bias_params.project),
         }
 
@@ -33,8 +36,8 @@ class BoostKeyTransactionsRulesGenerator(BiasRulesGenerator):
         return [
             {
                 "samplingValue": {
-                    "type": "sampleRate",
-                    "value": bias_data["sampleRate"],
+                    "type": "factor",
+                    "value": bias_data["factor"],
                 },
                 "type": "transaction",
                 "condition": {

+ 10 - 8
src/sentry/dynamic_sampling/rules/biases/boost_latest_releases_bias.py

@@ -12,10 +12,12 @@ from sentry.dynamic_sampling.rules.biases.base import (
 )
 from sentry.dynamic_sampling.rules.helpers.latest_releases import ProjectBoostedReleases
 from sentry.dynamic_sampling.rules.utils import (
-    RELEASE_BOOST_FACTOR,
+    LATEST_RELEASES_BOOST_DECAYED_FACTOR,
+    LATEST_RELEASES_BOOST_FACTOR,
     RESERVED_IDS,
     PolymorphicRule,
     RuleType,
+    apply_dynamic_factor,
 )
 
 
@@ -23,8 +25,10 @@ class BoostLatestReleasesDataProvider(BiasDataProvider):
     def get_bias_data(self, bias_params: BiasParams) -> BiasData:
         return {
             "id": RESERVED_IDS[RuleType.BOOST_LATEST_RELEASES_RULE],
-            "baseSampleRate": bias_params.base_sample_rate,
-            "sampleRate": min(1.0, bias_params.base_sample_rate * RELEASE_BOOST_FACTOR),
+            "factor": apply_dynamic_factor(
+                bias_params.base_sample_rate, LATEST_RELEASES_BOOST_FACTOR
+            ),
+            "decayedFactor": LATEST_RELEASES_BOOST_DECAYED_FACTOR,
             "boostedReleases": ProjectBoostedReleases(
                 bias_params.project.id
             ).get_extended_boosted_releases(),
@@ -40,8 +44,8 @@ class BoostLatestReleasesRulesGenerator(BiasRulesGenerator):
             [
                 {
                     "samplingValue": {
-                        "type": "sampleRate",
-                        "value": bias_data["sampleRate"],
+                        "type": "factor",
+                        "value": bias_data["factor"],
                     },
                     "type": "trace",
                     "active": True,
@@ -75,11 +79,9 @@ class BoostLatestReleasesRulesGenerator(BiasRulesGenerator):
                             ).replace(tzinfo=UTC)
                         ),
                     },
-                    # We want to use the linear decaying function for latest release boosting, with the goal
-                    # of interpolating the adoption growth with the reduction in sample rate.
                     "decayingFn": {
                         "type": "linear",
-                        "decayedValue": bias_data["baseSampleRate"],
+                        "decayedValue": bias_data["decayedFactor"],
                     },
                 }
                 for idx, boosted_release in enumerate(boosted_releases)

+ 2 - 2
src/sentry/dynamic_sampling/rules/biases/ignore_health_checks_bias.py

@@ -8,7 +8,7 @@ from sentry.dynamic_sampling.rules.biases.base import (
     BiasRulesGenerator,
 )
 from sentry.dynamic_sampling.rules.utils import (
-    HEALTH_CHECK_DROPPING_FACTOR,
+    IGNORE_HEALTH_CHECKS_FACTOR,
     RESERVED_IDS,
     PolymorphicRule,
     RuleType,
@@ -31,7 +31,7 @@ class IgnoreHealthChecksDataProvider(BiasDataProvider):
     def get_bias_data(self, bias_params: BiasParams) -> BiasData:
         return {
             "id": RESERVED_IDS[RuleType.IGNORE_HEALTH_CHECKS_RULE],
-            "sampleRate": bias_params.base_sample_rate / HEALTH_CHECK_DROPPING_FACTOR,
+            "sampleRate": bias_params.base_sample_rate / IGNORE_HEALTH_CHECKS_FACTOR,
             "healthCheckGlobs": HEALTH_CHECK_GLOBS,
         }
 

+ 1 - 1
src/sentry/dynamic_sampling/rules/combine.py

@@ -13,8 +13,8 @@ from sentry.dynamic_sampling.rules.utils import RuleType
 def get_relay_biases_combinator() -> BiasesCombinator:
     default_combinator = OrderedBiasesCombinator()
 
-    default_combinator.add(RuleType.BOOST_KEY_TRANSACTIONS_RULE, BoostKeyTransactionsBias())
     default_combinator.add(RuleType.IGNORE_HEALTH_CHECKS_RULE, IgnoreHealthChecksBias())
+    default_combinator.add(RuleType.BOOST_KEY_TRANSACTIONS_RULE, BoostKeyTransactionsBias())
 
     default_combinator.add(RuleType.BOOST_ENVIRONMENTS_RULE, BoostEnvironmentsBias())
     default_combinator.add(RuleType.BOOST_LATEST_RELEASES_RULE, BoostLatestReleasesBias())

+ 22 - 3
src/sentry/dynamic_sampling/rules/utils.py

@@ -6,9 +6,12 @@ from sentry.utils import json
 BOOSTED_RELEASES_LIMIT = 10
 BOOSTED_KEY_TRANSACTION_LIMIT = 10
 
-RELEASE_BOOST_FACTOR = 5
-KEY_TRANSACTION_BOOST_FACTOR = 5
-HEALTH_CHECK_DROPPING_FACTOR = 5
+KEY_TRANSACTIONS_BOOST_FACTOR = 1.5
+
+LATEST_RELEASES_BOOST_FACTOR = 1.5
+LATEST_RELEASES_BOOST_DECAYED_FACTOR = 1.0
+
+IGNORE_HEALTH_CHECKS_FACTOR = 5
 
 
 class ActivatableBias(TypedDict):
@@ -161,3 +164,19 @@ def get_enabled_user_biases(user_set_biases: Optional[List[ActivatableBias]]) ->
 
 def get_supported_biases_ids() -> Set[str]:
     return {bias["id"] for bias in DEFAULT_BIASES}
+
+
+def apply_dynamic_factor(base_sample_rate: float, x: float) -> float:
+    """
+    This function known as dynamic factor function is used during the rules generation in order to determine the factor
+    for each rule based on the base_sample_rate of the project.
+
+    The high-level idea is that we want to reduce the factor the bigger the base_sample_rate becomes, this is done
+    because multiplication will exceed 1 very quickly in case we don't reduce the factor.
+    """
+    if base_sample_rate <= 0.0 or base_sample_rate > 1.0:
+        raise Exception(
+            "The dynamic factor function requires a sample rate in the interval [0.x, 1.0] with x > 0."
+        )
+
+    return float(x / x**base_sample_rate)

+ 3 - 0
src/sentry/relay/config/__init__.py

@@ -162,7 +162,10 @@ def get_dynamic_sampling_config(project: Project) -> Optional[Mapping[str, Any]]
     if features.has("organizations:dynamic-sampling", project.organization) and options.get(
         "dynamic-sampling:enabled-biases"
     ):
+        # For compatibility reasons we want to return an empty list of old rules. This has been done in order to make
+        # old Relays use empty configs which will result in them forwarding sampling decisions to upstream Relays.
         return {"rules": [], "rulesV2": generate_rules(project)}
+
     return None
 
 

+ 3 - 3
tests/sentry/dynamic_sampling/rules/biases/test_boost_key_transactions_bias.py

@@ -13,12 +13,12 @@ from sentry.dynamic_sampling.rules.biases.boost_key_transactions_bias import (
 )
 def test_generate_bias_rules_v2(data_provider, default_project):
     rule_id = 1002
-    sample_rate = 0.5
+    factor = 1.5
     key_transactions = ["/foo", "/bar"]
 
     data_provider.get_bias_data.return_value = {
         "id": rule_id,
-        "sampleRate": sample_rate,
+        "factor": factor,
         "keyTransactions": key_transactions,
     }
 
@@ -38,7 +38,7 @@ def test_generate_bias_rules_v2(data_provider, default_project):
                 "op": "or",
             },
             "id": rule_id,
-            "samplingValue": {"type": "sampleRate", "value": sample_rate},
+            "samplingValue": {"type": "factor", "value": factor},
             "type": "transaction",
         }
     ]

+ 8 - 8
tests/sentry/dynamic_sampling/rules/biases/test_boost_latest_releases_bias.py

@@ -22,8 +22,8 @@ MOCK_DATETIME = ONE_DAY_AGO.replace(hour=10, minute=0, second=0, microsecond=0)
 def test_generate_bias_rules_v2(data_provider, default_project):
     now = timezone.now()
 
-    base_sample_rate = 0.1
-    sample_rate = 0.5
+    factor = 1.5
+    decayed_factor = 1.0
     platform = "python"
 
     default_project.update(platform=platform)
@@ -49,8 +49,8 @@ def test_generate_bias_rules_v2(data_provider, default_project):
 
     data_provider.get_bias_data.return_value = {
         "id": 1000,
-        "baseSampleRate": base_sample_rate,
-        "sampleRate": sample_rate,
+        "factor": factor,
+        "decayedFactor": decayed_factor,
         "boostedReleases": boosted_releases,
     }
 
@@ -66,12 +66,12 @@ def test_generate_bias_rules_v2(data_provider, default_project):
                 "op": "and",
             },
             "id": 1000,
-            "samplingValue": {"type": "sampleRate", "value": sample_rate},
+            "samplingValue": {"type": "factor", "value": factor},
             "timeRange": {
                 "end": (now + timedelta(seconds=LATEST_RELEASE_TTAS[platform])).isoformat(" "),
                 "start": now.isoformat(" "),
             },
-            "decayingFn": {"type": "linear", "decayedValue": base_sample_rate},
+            "decayingFn": {"type": "linear", "decayedValue": decayed_factor},
             "type": "trace",
         },
         {
@@ -84,12 +84,12 @@ def test_generate_bias_rules_v2(data_provider, default_project):
                 "op": "and",
             },
             "id": 1001,
-            "samplingValue": {"type": "sampleRate", "value": sample_rate},
+            "samplingValue": {"type": "factor", "value": factor},
             "timeRange": {
                 "end": (now + timedelta(seconds=LATEST_RELEASE_TTAS[platform])).isoformat(" "),
                 "start": now.isoformat(" "),
             },
-            "decayingFn": {"type": "linear", "decayedValue": base_sample_rate},
+            "decayingFn": {"type": "linear", "decayedValue": decayed_factor},
             "type": "trace",
         },
     ]

+ 36 - 24
tests/sentry/dynamic_sampling/test_generate_rules.py

@@ -13,6 +13,11 @@ from sentry.dynamic_sampling import (
     generate_rules,
     get_redis_client_for_ds,
 )
+from sentry.dynamic_sampling.rules.utils import (
+    KEY_TRANSACTIONS_BOOST_FACTOR,
+    LATEST_RELEASES_BOOST_DECAYED_FACTOR,
+    LATEST_RELEASES_BOOST_FACTOR,
+)
 from sentry.models import ProjectTeam
 from sentry.testutils.factories import Factories
 from sentry.utils import json
@@ -166,13 +171,13 @@ def test_generate_rules_return_uniform_rules_and_env_rule(get_blended_sample_rat
 
 
 @pytest.mark.django_db
+@patch("sentry.dynamic_sampling.rules.biases.boost_key_transactions_bias.apply_dynamic_factor")
 @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate")
 def test_generate_rules_return_uniform_rules_and_key_transaction_rule(
-    get_blended_sample_rate, default_project, default_team
+    get_blended_sample_rate, apply_dynamic_factor, default_project, default_team
 ):
     get_blended_sample_rate.return_value = 0.1
-    # since we mock get_blended_sample_rate function
-    # no need to create real project in DB
+    apply_dynamic_factor.return_value = KEY_TRANSACTIONS_BOOST_FACTOR
 
     default_project.update_option(
         "sentry:dynamic_sampling_biases",
@@ -205,7 +210,7 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule(
                 "op": "or",
             },
             "id": 1003,
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": KEY_TRANSACTIONS_BOOST_FACTOR},
             "type": "transaction",
         },
         {
@@ -223,13 +228,13 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule(
 
 
 @pytest.mark.django_db
+@patch("sentry.dynamic_sampling.rules.biases.boost_key_transactions_bias.apply_dynamic_factor")
 @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate")
 def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_dups(
-    get_blended_sample_rate, default_project, default_team
+    get_blended_sample_rate, apply_dynamic_factor, default_project, default_team
 ):
     get_blended_sample_rate.return_value = 0.1
-    # since we mock get_blended_sample_rate function
-    # no need to create real project in DB
+    apply_dynamic_factor.return_value = KEY_TRANSACTIONS_BOOST_FACTOR
 
     default_project.update_option(
         "sentry:dynamic_sampling_biases",
@@ -271,7 +276,7 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_dups(
                 "op": "or",
             },
             "id": 1003,
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": KEY_TRANSACTIONS_BOOST_FACTOR},
             "type": "transaction",
         },
         {
@@ -289,13 +294,13 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_dups(
 
 
 @pytest.mark.django_db
+@patch("sentry.dynamic_sampling.rules.biases.boost_key_transactions_bias.apply_dynamic_factor")
 @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate")
 def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_many_records(
-    get_blended_sample_rate, default_project, default_team
+    get_blended_sample_rate, apply_dynamic_factor, default_project, default_team
 ):
     get_blended_sample_rate.return_value = 0.1
-    # since we mock get_blended_sample_rate function
-    # no need to create real project in DB
+    apply_dynamic_factor.return_value = KEY_TRANSACTIONS_BOOST_FACTOR
 
     default_project.update_option(
         "sentry:dynamic_sampling_biases",
@@ -331,7 +336,7 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_many_
                 "op": "or",
             },
             "id": 1003,
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": KEY_TRANSACTIONS_BOOST_FACTOR},
             "type": "transaction",
         },
         {
@@ -372,6 +377,7 @@ def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule(
 
 
 @freeze_time("2022-10-21 18:50:25+00:00")
+@patch("sentry.dynamic_sampling.rules.biases.boost_latest_releases_bias.apply_dynamic_factor")
 @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate")
 @pytest.mark.django_db
 @pytest.mark.parametrize(
@@ -386,6 +392,7 @@ def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule(
 )
 def test_generate_rules_with_different_project_platforms(
     get_blended_sample_rate,
+    apply_dynamic_factor,
     version,
     platform,
     end,
@@ -393,6 +400,7 @@ def test_generate_rules_with_different_project_platforms(
     latest_release_only,
 ):
     get_blended_sample_rate.return_value = 0.1
+    apply_dynamic_factor.return_value = LATEST_RELEASES_BOOST_FACTOR
 
     redis_client = get_redis_client_for_ds()
 
@@ -408,7 +416,7 @@ def test_generate_rules_with_different_project_platforms(
 
     assert generate_rules(default_project) == [
         {
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": LATEST_RELEASES_BOOST_FACTOR},
             "type": "trace",
             "active": True,
             "condition": {
@@ -427,7 +435,7 @@ def test_generate_rules_with_different_project_platforms(
                 "start": "2022-10-21 18:50:25+00:00",
                 "end": end,
             },
-            "decayingFn": {"type": "linear", "decayedValue": 0.1},
+            "decayingFn": {"type": "linear", "decayedValue": LATEST_RELEASES_BOOST_DECAYED_FACTOR},
         },
         {
             "active": True,
@@ -444,11 +452,13 @@ def test_generate_rules_with_different_project_platforms(
 
 @pytest.mark.django_db
 @freeze_time("2022-10-21 18:50:25+00:00")
+@patch("sentry.dynamic_sampling.rules.biases.boost_latest_releases_bias.apply_dynamic_factor")
 @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate")
 def test_generate_rules_return_uniform_rules_and_latest_release_rule(
-    get_blended_sample_rate, default_project, latest_release_only
+    get_blended_sample_rate, apply_dynamic_factor, default_project, latest_release_only
 ):
     get_blended_sample_rate.return_value = 0.1
+    apply_dynamic_factor.return_value = LATEST_RELEASES_BOOST_FACTOR
 
     redis_client = get_redis_client_for_ds()
 
@@ -468,7 +478,7 @@ def test_generate_rules_return_uniform_rules_and_latest_release_rule(
 
     assert generate_rules(default_project) == [
         {
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": LATEST_RELEASES_BOOST_FACTOR},
             "type": "trace",
             "active": True,
             "condition": {
@@ -480,10 +490,10 @@ def test_generate_rules_return_uniform_rules_and_latest_release_rule(
             },
             "id": 1500,
             "timeRange": {"start": "2022-10-21 18:50:25+00:00", "end": "2022-10-21 20:03:03+00:00"},
-            "decayingFn": {"type": "linear", "decayedValue": 0.1},
+            "decayingFn": {"type": "linear", "decayedValue": LATEST_RELEASES_BOOST_DECAYED_FACTOR},
         },
         {
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": LATEST_RELEASES_BOOST_FACTOR},
             "type": "trace",
             "active": True,
             "condition": {
@@ -495,10 +505,10 @@ def test_generate_rules_return_uniform_rules_and_latest_release_rule(
             },
             "id": 1501,
             "timeRange": {"start": "2022-10-21 18:50:25+00:00", "end": "2022-10-21 20:03:03+00:00"},
-            "decayingFn": {"type": "linear", "decayedValue": 0.1},
+            "decayingFn": {"type": "linear", "decayedValue": LATEST_RELEASES_BOOST_DECAYED_FACTOR},
         },
         {
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": LATEST_RELEASES_BOOST_FACTOR},
             "type": "trace",
             "active": True,
             "condition": {
@@ -510,7 +520,7 @@ def test_generate_rules_return_uniform_rules_and_latest_release_rule(
             },
             "id": 1502,
             "timeRange": {"start": "2022-10-21 18:50:25+00:00", "end": "2022-10-21 20:03:03+00:00"},
-            "decayingFn": {"type": "linear", "decayedValue": 0.1},
+            "decayingFn": {"type": "linear", "decayedValue": LATEST_RELEASES_BOOST_DECAYED_FACTOR},
         },
         {
             "active": True,
@@ -527,11 +537,13 @@ def test_generate_rules_return_uniform_rules_and_latest_release_rule(
 
 @pytest.mark.django_db
 @freeze_time("2022-10-21 18:50:25+00:00")
+@patch("sentry.dynamic_sampling.rules.biases.boost_latest_releases_bias.apply_dynamic_factor")
 @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate")
 def test_generate_rules_does_not_return_rule_with_deleted_release(
-    get_blended_sample_rate, default_project, latest_release_only
+    get_blended_sample_rate, apply_dynamic_factor, default_project, latest_release_only
 ):
     get_blended_sample_rate.return_value = 0.1
+    apply_dynamic_factor.return_value = LATEST_RELEASES_BOOST_FACTOR
 
     redis_client = get_redis_client_for_ds()
 
@@ -554,7 +566,7 @@ def test_generate_rules_does_not_return_rule_with_deleted_release(
 
     assert generate_rules(default_project) == [
         {
-            "samplingValue": {"type": "sampleRate", "value": 0.5},
+            "samplingValue": {"type": "factor", "value": LATEST_RELEASES_BOOST_FACTOR},
             "type": "trace",
             "active": True,
             "condition": {
@@ -566,7 +578,7 @@ def test_generate_rules_does_not_return_rule_with_deleted_release(
             },
             "id": 1500,
             "timeRange": {"start": "2022-10-21 18:50:25+00:00", "end": "2022-10-21 20:03:03+00:00"},
-            "decayingFn": {"type": "linear", "decayedValue": 0.1},
+            "decayingFn": {"type": "linear", "decayedValue": LATEST_RELEASES_BOOST_DECAYED_FACTOR},
         },
         {
             "active": True,

Some files were not shown because too many files changed in this diff