Browse Source

ref(metrics) Stop merging rules with the same condition but different projects (#57369)

Radu Woinaroski 1 year ago
parent
commit
53b8ba862b

+ 4 - 2
src/sentry/api/endpoints/custom_rules.py

@@ -172,10 +172,12 @@ class CustomRulesEndpoint(OrganizationEndpoint):
         except ValueError as e:
             return Response({"query": ["Could not convert to rule", str(e)]}, status=400)
 
-        rule = CustomDynamicSamplingRule.get_rule_for_org(condition, organization.id)
+        rule = CustomDynamicSamplingRule.get_rule_for_org(
+            condition, organization.id, requested_projects_ids
+        )
 
         if rule is None:
-            return Response(status=204)  # no rule found, nothing to reutrn
+            return Response(status=204)  # no rule found, nothing to return
 
         # we have a rule, check to see if the projects match
 

+ 14 - 21
src/sentry/models/dynamicsampling.py

@@ -28,13 +28,15 @@ class TooManyRules(ValueError):
     pass
 
 
-def get_condition_hash(condition: Any) -> str:
+def get_rule_hash(condition: Any, project_ids: Sequence[int]) -> str:
     """
     Returns the hash of the rule based on the condition and projects
     """
     condition_string = to_order_independent_string(condition)
+    project_string = to_order_independent_string(list(project_ids))
+    rule_string = f"{condition_string}-{project_string}"
     # make it a bit shorter
-    return hashlib.sha1(condition_string.encode("utf-8")).hexdigest()
+    return hashlib.sha1(rule_string.encode("utf-8")).hexdigest()
 
 
 def to_order_independent_string(val: Any) -> str:
@@ -129,7 +131,9 @@ class CustomDynamicSamplingRule(Model):
 
     @staticmethod
     def get_rule_for_org(
-        condition: Any, organization_id: int
+        condition: Any,
+        organization_id: int,
+        project_ids: Sequence[int],
     ) -> Optional["CustomDynamicSamplingRule"]:
         """
         Returns an active rule for the given condition and organization if it exists otherwise None
@@ -137,10 +141,10 @@ class CustomDynamicSamplingRule(Model):
         Note: There should not be more than one active rule for a given condition and organization
         This function doesn't verify this condition, it just returns the first one.
         """
-        condition_hash = get_condition_hash(condition)
+        rule_hash = get_rule_hash(condition, project_ids)
         rules = CustomDynamicSamplingRule.objects.filter(
             organization_id=organization_id,
-            condition_hash=condition_hash,
+            condition_hash=rule_hash,
             is_active=True,
             end_date__gt=timezone.now(),
         )[:1]
@@ -162,7 +166,9 @@ class CustomDynamicSamplingRule(Model):
 
         with transaction.atomic(router.db_for_write(CustomDynamicSamplingRule)):
             # check if rule already exists for this organization
-            existing_rule = CustomDynamicSamplingRule.get_rule_for_org(condition, organization_id)
+            existing_rule = CustomDynamicSamplingRule.get_rule_for_org(
+                condition, organization_id, project_ids
+            )
 
             if existing_rule is not None:
                 # we already have an active rule for this condition and this organization
@@ -171,25 +177,12 @@ class CustomDynamicSamplingRule(Model):
                 existing_rule.num_samples = max(num_samples, existing_rule.num_samples)
                 existing_rule.sample_rate = max(sample_rate, existing_rule.sample_rate)
 
-                if not existing_rule.is_org_level:
-                    # for project rules we need to add the projects,org rules already include everything
-                    if len(project_ids) == 0:
-                        # the new rule is an org rule promote current rule to org rule and remove all
-                        # relations to individual projects
-                        existing_rule.is_org_level = True
-                        existing_rule.projects.clear()
-                    else:
-                        # add the new projects to the rule, if not already there
-                        for project_id in project_ids:
-                            project = Project.objects.get_from_cache(id=project_id)
-                            existing_rule.projects.add(project)
-
                 # for org rules we don't need to do anything with the projects
                 existing_rule.save()
                 return existing_rule
             else:
                 # create a new rule
-                condition_hash = get_condition_hash(condition)
+                rule_hash = get_rule_hash(condition, project_ids)
                 is_org_level = len(project_ids) == 0
                 condition_str = json.dumps(condition)
                 rule = CustomDynamicSamplingRule.objects.create(
@@ -199,7 +192,7 @@ class CustomDynamicSamplingRule(Model):
                     start_date=start,
                     end_date=end,
                     num_samples=num_samples,
-                    condition_hash=condition_hash,
+                    condition_hash=rule_hash,
                     is_active=True,
                     is_org_level=is_org_level,
                 )

+ 8 - 14
tests/sentry/api/endpoints/test_custom_rules.py

@@ -92,7 +92,7 @@ class CustomRulesGetEndpoint(APITestCase):
                 self.organization.slug,
                 qs_params={
                     "query": "environment:prod",
-                    "project": [self.known_projects[1].id],
+                    "project": [proj.id for proj in self.known_projects[1:3]],
                 },
             )
 
@@ -127,7 +127,7 @@ class CustomRulesGetEndpoint(APITestCase):
 
     def test_finds_org_condition(self):
         """
-        A request for either any project of org will find an org rule ( if condition matches)
+        A request for org will find an org rule ( if condition matches)
         """
 
         # finds projects in the org rule
@@ -136,7 +136,7 @@ class CustomRulesGetEndpoint(APITestCase):
                 self.organization.slug,
                 qs_params={
                     "query": "environment:dev",
-                    "project": [self.known_projects[1].id],
+                    "project": [],
                 },
             )
         assert resp.status_code == 200
@@ -177,7 +177,7 @@ class CustomRulesGetEndpoint(APITestCase):
                 self.organization.slug,
                 qs_params={
                     "query": "environment:prod",
-                    "project": [self.known_projects[1].id],
+                    "project": [project.id for project in self.known_projects[1:3]],
                 },
             )
         assert resp.status_code == 200
@@ -212,7 +212,6 @@ class CustomRulesEndpoint(APITestCase):
             "query": "event.type:transaction",
             "projects": [self.project.id],
             "period": "1h",
-            "overrideExisting": True,
         }
         with Feature({"organizations:investigation-bias": True}):
             resp = self.get_response(self.organization.slug, raw_data=request_data)
@@ -241,7 +240,6 @@ class CustomRulesEndpoint(APITestCase):
             "query": "",
             "projects": [self.project.id],
             "period": "1h",
-            "overrideExisting": True,
         }
         with Feature({"organizations:investigation-bias": True}):
             resp = self.get_response(self.organization.slug, raw_data=request_data)
@@ -267,16 +265,15 @@ class CustomRulesEndpoint(APITestCase):
 
     def test_updates_existing(self):
         """
-        Test that the endpoint updates an existing rule if the same rule condition is given
+        Test that the endpoint updates an existing rule if the same rule condition and projects is given
 
         The rule id should be the same
-        The period and the projects should be updated
+        The period should be updated
         """
         request_data = {
             "query": "event.type:transaction",
             "projects": [self.project.id],
             "period": "1h",
-            "overrideExisting": True,
         }
 
         # create rule
@@ -294,9 +291,8 @@ class CustomRulesEndpoint(APITestCase):
 
         request_data = {
             "query": "event.type:transaction",
-            "projects": [self.second_project.id],
+            "projects": [self.project.id],
             "period": "2h",
-            "overrideExisting": True,
         }
 
         # update existing rule
@@ -311,8 +307,7 @@ class CustomRulesEndpoint(APITestCase):
         assert end_date - start_date >= timedelta(hours=2)
 
         projects = data["projects"]
-        assert self.project.id in projects
-        assert self.second_project.id in projects
+        assert projects == [self.project.id]
 
         new_rule_id = data["ruleId"]
         assert rule_id == new_rule_id
@@ -325,7 +320,6 @@ class CustomRulesEndpoint(APITestCase):
             "query": "event.type:transaction",
             "projects": [self.project.id],
             "period": "1h",
-            "overrideExisting": True,
         }
         with Feature({"organizations:investigation-bias": False}):
             resp = self.get_response(self.organization.slug, raw_data=request_data)

+ 47 - 5
tests/sentry/models/test_dynamicsampling.py

@@ -38,7 +38,7 @@ class TestCustomDynamicSamplingRuleProject(TestCase):
             condition=condition,
             start=timezone.now() + timedelta(minutes=1),
             end=end2,
-            project_ids=[self.second_project.id],
+            project_ids=[self.project.id],
             organization_id=self.organization.id,
             num_samples=100,
             sample_rate=0.5,
@@ -47,9 +47,8 @@ class TestCustomDynamicSamplingRuleProject(TestCase):
         assert rule.id == updated_rule.id
         projects = updated_rule.projects.all()
 
-        assert len(projects) == 2
+        assert len(projects) == 1
         assert self.project in projects
-        assert self.second_project in projects
 
         assert updated_rule.end_date >= end1
         assert updated_rule.end_date >= end2
@@ -148,7 +147,9 @@ class TestCustomDynamicSamplingRuleProject(TestCase):
         condition = {"op": "equals", "name": "environment", "value": "prod"}
 
         # check empty result
-        rule = CustomDynamicSamplingRule.get_rule_for_org(condition, self.organization.id)
+        rule = CustomDynamicSamplingRule.get_rule_for_org(
+            condition, self.organization.id, [self.project.id]
+        )
         assert rule is None
 
         new_rule = CustomDynamicSamplingRule.update_or_create(
@@ -161,7 +162,9 @@ class TestCustomDynamicSamplingRuleProject(TestCase):
             sample_rate=0.5,
         )
 
-        rule = CustomDynamicSamplingRule.get_rule_for_org(condition, self.organization.id)
+        rule = CustomDynamicSamplingRule.get_rule_for_org(
+            condition, self.organization.id, [self.project.id]
+        )
         assert rule == new_rule
 
     def test_get_project_rules(self):
@@ -223,3 +226,42 @@ class TestCustomDynamicSamplingRuleProject(TestCase):
         assert len(rules) == 2
         assert valid_project_rule in rules
         assert valid_org_rule in rules
+
+    def test_separate_projects_create_different_rules(self):
+        """
+        Tests that same condition for different projects create different rules
+        """
+        condition = {"op": "equals", "name": "environment", "value": "prod"}
+
+        end1 = timezone.now() + timedelta(hours=1)
+
+        rule = CustomDynamicSamplingRule.update_or_create(
+            condition=condition,
+            start=timezone.now(),
+            end=end1,
+            project_ids=[self.project.id],
+            organization_id=self.organization.id,
+            num_samples=100,
+            sample_rate=0.5,
+        )
+
+        end2 = timezone.now() + timedelta(hours=1)
+        second_rule = CustomDynamicSamplingRule.update_or_create(
+            condition=condition,
+            start=timezone.now() + timedelta(minutes=1),
+            end=end2,
+            project_ids=[self.second_project.id],
+            organization_id=self.organization.id,
+            num_samples=100,
+            sample_rate=0.5,
+        )
+
+        assert rule.id != second_rule.id
+
+        first_projects = rule.projects.all()
+        assert len(first_projects) == 1
+        assert self.project == first_projects[0]
+
+        second_projects = second_rule.projects.all()
+        assert len(second_projects) == 1
+        assert self.second_project == second_projects[0]