Browse Source

feat(sampling): Implement denylist query param in project tags API [TET-113] (#35138)

* (feat): Implement denylist query param in project tags api

Add new optional query param `onlySamplingTags` to
`/api/0/projects/<organization>/<project>/tags/`.
So we can filter out tags using sentry.constants.DS_DENYLIST.

fixes TET-113

Co-authored-by: Ahmed Etefy <ahmed.etefy12@gmail.com>
Andrii Soldatenko 2 years ago
parent
commit
517ee963e0

+ 1 - 0
.gitignore

@@ -43,6 +43,7 @@ tests/fixtures/integrations/*/data/**
 /test_cli/
 Gemfile.lock
 .idea/
+.vim/
 *.iml
 .pytest_cache/
 .vscode/tags

+ 11 - 9
src/sentry/api/endpoints/project_tags.py

@@ -4,7 +4,7 @@ from rest_framework.response import Response
 from sentry import tagstore
 from sentry.api.base import EnvironmentMixin
 from sentry.api.bases.project import ProjectEndpoint
-from sentry.constants import PROTECTED_TAG_KEYS
+from sentry.constants import DS_DENYLIST, PROTECTED_TAG_KEYS
 from sentry.models import Environment
 
 
@@ -15,15 +15,17 @@ class ProjectTagsEndpoint(ProjectEndpoint, EnvironmentMixin):
         except Environment.DoesNotExist:
             tag_keys = []
         else:
+            kwargs = dict(
+                # We might be able to stop including these values, but this
+                # is a pretty old endpoint, so concerned about breaking
+                # existing api consumers.
+                include_values_seen=True,
+            )
+            if request.GET.get("onlySamplingTags") == "1":
+                kwargs.update(denylist=DS_DENYLIST)
+
             tag_keys = sorted(
-                tagstore.get_tag_keys(
-                    project.id,
-                    environment_id,
-                    # We might be able to stop including these values, but this
-                    # is a pretty old endpoint, so concerned about breaking
-                    # existing api consumers.
-                    include_values_seen=True,
-                ),
+                tagstore.get_tag_keys(project.id, environment_id, **kwargs),
                 key=lambda x: x.key,
             )
 

+ 43 - 0
src/sentry/constants.py

@@ -580,3 +580,46 @@ DataCategory = sentry_relay.DataCategory
 
 CRASH_RATE_ALERT_SESSION_COUNT_ALIAS = "_total_count"
 CRASH_RATE_ALERT_AGGREGATE_ALIAS = "_crash_rate_alert_aggregate"
+
+# Dynamic sampling denylist composed manually from
+# 1. `src/sentry/event_manager.py:save`. We have function
+# `_derive_interface_tags_many(jobs)` which iterates across all interfaces
+# and execute `iter_tags`, so i've searched usage of `iter_tags`.
+# 2. `src/sentry/event_manager.py:_pull_out_data` we have `set_tag`.
+# 3. `src/sentry/event_manager.py:_get_event_user_many` we have `set_tag`.
+# 4. `src/sentry/event_manager.py:_get_or_create_release_many` we have `set_tag`.
+# 5. `src/sentry/interfaces/exception.py:Mechanism` we have `iter_tags`.
+# 6. `src/sentry/plugins/sentry_urls/models.py:UrlsPlugin`.
+# 7. `sentry/src/sentry/plugins/sentry_interface_types/models.py`.
+# 8. `src/sentry/plugins/sentry_useragents/models.py:UserAgentPlugin`.
+# Note:
+# should be sorted alphabetically so that it is easy to maintain in future
+# if you update this list please add explanation or source of it
+DS_DENYLIST = frozenset(
+    [
+        "app.device",
+        "browser",
+        "browser.name",
+        "device",
+        "device.family",
+        "environment",
+        "gpu.name",
+        "gpu.vendor",
+        "handled",
+        "interface_type",
+        "level",
+        "logger",
+        "mechanism",
+        "monitor.id",
+        "os",
+        "os.name",
+        "os.rooted",
+        "runtime",
+        "runtime.name",
+        "sentry:dist",
+        "sentry:release",
+        "sentry:user",
+        "transaction",
+        "url",
+    ]
+)

+ 17 - 2
src/sentry/tagstore/snuba/backend.py

@@ -175,6 +175,7 @@ class SnubaTagStorage(TagStorage):
         keys=None,
         include_values_seen=True,
         include_transactions=False,
+        denylist=None,
         **kwargs,
     ):
         return self.__get_tag_keys_for_projects(
@@ -187,6 +188,7 @@ class SnubaTagStorage(TagStorage):
             keys,
             include_values_seen=include_values_seen,
             include_transactions=include_transactions,
+            denylist=denylist,
         )
 
     def __get_tag_keys_for_projects(
@@ -201,6 +203,7 @@ class SnubaTagStorage(TagStorage):
         include_values_seen=True,
         use_cache=False,
         include_transactions=False,
+        denylist=None,
         **kwargs,
     ):
         """Query snuba for tag keys based on projects
@@ -288,7 +291,12 @@ class SnubaTagStorage(TagStorage):
             ctor = functools.partial(GroupTagKey, group_id=group_id)
 
         results = set()
+
         for key, data in result.items():
+            # Ignore key (skip interation) if it's in denylist
+            if denylist is not None and key in denylist:
+                continue
+
             params = {"key": key}
             if include_values_seen:
                 params["values_seen"] = data["values_seen"]
@@ -336,10 +344,17 @@ class SnubaTagStorage(TagStorage):
         return self.__get_tag_key_and_top_values(project_id, None, environment_id, key, **kwargs)
 
     def get_tag_keys(
-        self, project_id, environment_id, status=TagKeyStatus.VISIBLE, include_values_seen=False
+        self,
+        project_id,
+        environment_id,
+        status=TagKeyStatus.VISIBLE,
+        include_values_seen=False,
+        denylist=None,
     ):
         assert status is TagKeyStatus.VISIBLE
-        return self.__get_tag_keys(project_id, None, environment_id and [environment_id])
+        return self.__get_tag_keys(
+            project_id, None, environment_id and [environment_id], denylist=denylist
+        )
 
     def get_tag_keys_for_projects(
         self,

+ 41 - 0
tests/snuba/api/endpoints/test_project_tags.py

@@ -1,3 +1,4 @@
+from sentry.constants import DS_DENYLIST
 from sentry.testutils import APITestCase, SnubaTestCase
 from sentry.testutils.helpers.datetime import before_now, iso_format
 
@@ -31,3 +32,43 @@ class ProjectTagsTest(APITestCase, SnubaTestCase):
         assert data["foo"]["uniqueValues"] == 1
         assert data["bar"]["canDelete"]
         assert data["bar"]["uniqueValues"] == 2
+
+    def test_simple_remove_tags_in_denylist(self):
+        self.store_event(
+            data={
+                # "browser" and "sentry:dist" are in denylist sentry.constants.DS_DENYLIST
+                "tags": {"browser": "chrome", "bar": "rab", "sentry:dist": "test_dist"},
+                "timestamp": iso_format(before_now(minutes=1)),
+            },
+            project_id=self.project.id,
+        )
+        self.store_event(
+            data={"tags": {"bar": "rab2"}, "timestamp": iso_format(before_now(minutes=1))},
+            project_id=self.project.id,
+        )
+
+        response = self.get_success_response(
+            self.project.organization.slug, self.project.slug, onlySamplingTags=1
+        )
+
+        data = {v["key"]: v for v in response.data}
+        assert len(data) == 1
+
+        assert data["bar"]["canDelete"]
+        assert data["bar"]["uniqueValues"] == 2
+
+    def test_simple_remove_tags_in_denylist_remove_all_tags(self):
+        self.store_event(
+            data={
+                "tags": {deny_tag: "value_{deny_tag}" for deny_tag in DS_DENYLIST},
+                "timestamp": iso_format(before_now(minutes=1)),
+            },
+            project_id=self.project.id,
+        )
+        response = self.get_success_response(
+            self.project.organization.slug, self.project.slug, onlySamplingTags=1
+        )
+
+        data = {v["key"]: v for v in response.data}
+        assert len(data) == 0
+        assert data == {}

+ 25 - 0
tests/snuba/tagstore/test_tagstore_backend.py

@@ -212,6 +212,31 @@ class TagStorageTest(TestCase, SnubaTestCase):
         }
         assert set(keys) == expected_keys
 
+    def test_get_tag_keys_removed_from_denylist(self):
+        denylist_keys = frozenset(["browser", "sentry:release"])
+        expected_keys = {
+            "baz",
+            "environment",
+            "foo",
+            "sentry:user",
+            "level",
+        }
+        keys = {
+            k.key: k
+            for k in self.ts.get_tag_keys(
+                project_id=self.proj1.id, environment_id=self.proj1env1.id, denylist=denylist_keys
+            )
+        }
+        assert set(keys) == expected_keys
+        keys = {
+            k.key: k
+            for k in self.ts.get_tag_keys(
+                project_id=self.proj1.id, environment_id=self.proj1env1.id
+            )
+        }
+        expected_keys |= {"browser", "sentry:release"}
+        assert set(keys) == expected_keys
+
     def test_get_group_tag_key(self):
         with pytest.raises(GroupTagKeyNotFound):
             self.ts.get_group_tag_key(