Browse Source

fix(tagstore): Always pass a dataset in tagstore (#24978)

- Most of these are group specific, so I'm using Dataset.Events for
  those
- get_tag_keys_for_projects, is used across the app for autocomplete,
  and getting a list of columns. 

* ref: use include_transactions to be more consistent

- Defaults to false, but defaulting it to true for the organization_tags
  endpoint since the frontend caches this and it gets re-used between
  issues and discover
William Mak 3 years ago
parent
commit
aee9db28f9

+ 2 - 0
src/sentry/api/endpoints/organization_tags.py

@@ -19,6 +19,8 @@ class OrganizationTagsEndpoint(OrganizationEventsEndpointBase):
                 filter_params["start"],
                 filter_params["start"],
                 filter_params["end"],
                 filter_params["end"],
                 use_cache=request.GET.get("use_cache", "0") == "1",
                 use_cache=request.GET.get("use_cache", "0") == "1",
+                # Defaults to True, because the frontend caches these tags globally
+                include_transactions=request.GET.get("include_transactions", "1") == "1",
             )
             )
 
 
         return Response(serialize(results, request.user))
         return Response(serialize(results, request.user))

+ 35 - 8
src/sentry/tagstore/snuba/backend.py

@@ -5,7 +5,6 @@ from pytz import UTC
 
 
 from django.core.cache import cache
 from django.core.cache import cache
 
 
-from sentry import options
 from sentry.api.event_search import FIELD_ALIASES, PROJECT_ALIAS, USER_DISPLAY_ALIAS
 from sentry.api.event_search import FIELD_ALIASES, PROJECT_ALIAS, USER_DISPLAY_ALIAS
 from sentry.models import Project, ReleaseProjectEnvironment
 from sentry.models import Project, ReleaseProjectEnvironment
 from sentry.api.utils import default_start_end_dates
 from sentry.api.utils import default_start_end_dates
@@ -67,6 +66,7 @@ class SnubaTagStorage(TagStorage):
         aggregations = [["uniq", tag, "values_seen"], ["count()", "", "count"]]
         aggregations = [["uniq", tag, "values_seen"], ["count()", "", "count"]]
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
             aggregations=aggregations,
             aggregations=aggregations,
@@ -84,7 +84,6 @@ class SnubaTagStorage(TagStorage):
     def __get_tag_key_and_top_values(
     def __get_tag_key_and_top_values(
         self, project_id, group_id, environment_id, key, limit=3, raise_on_empty=True, **kwargs
         self, project_id, group_id, environment_id, key, limit=3, raise_on_empty=True, **kwargs
     ):
     ):
-
         tag = f"tags[{key}]"
         tag = f"tags[{key}]"
         filters = {"project_id": get_project_list(project_id)}
         filters = {"project_id": get_project_list(project_id)}
         if environment_id:
         if environment_id:
@@ -104,6 +103,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
 
 
         result, totals = snuba.query(
         result, totals = snuba.query(
+            dataset=Dataset.Events,
             start=kwargs.get("start"),
             start=kwargs.get("start"),
             end=kwargs.get("end"),
             end=kwargs.get("end"),
             groupby=[tag],
             groupby=[tag],
@@ -152,6 +152,7 @@ class SnubaTagStorage(TagStorage):
         limit=1000,
         limit=1000,
         keys=None,
         keys=None,
         include_values_seen=True,
         include_values_seen=True,
+        include_transactions=False,
         **kwargs,
         **kwargs,
     ):
     ):
         return self.__get_tag_keys_for_projects(
         return self.__get_tag_keys_for_projects(
@@ -163,6 +164,7 @@ class SnubaTagStorage(TagStorage):
             limit,
             limit,
             keys,
             keys,
             include_values_seen=include_values_seen,
             include_values_seen=include_values_seen,
+            include_transactions=include_transactions,
         )
         )
 
 
     def __get_tag_keys_for_projects(
     def __get_tag_keys_for_projects(
@@ -176,6 +178,7 @@ class SnubaTagStorage(TagStorage):
         keys=None,
         keys=None,
         include_values_seen=True,
         include_values_seen=True,
         use_cache=False,
         use_cache=False,
+        include_transactions=False,
         **kwargs,
         **kwargs,
     ):
     ):
         """Query snuba for tag keys based on projects
         """Query snuba for tag keys based on projects
@@ -215,18 +218,18 @@ class SnubaTagStorage(TagStorage):
         should_cache = use_cache and group_id is None
         should_cache = use_cache and group_id is None
         result = None
         result = None
 
 
+        dataset = Dataset.Events
+        if include_transactions:
+            dataset = Dataset.Discover
+
         if should_cache:
         if should_cache:
             filtering_strings = [f"{key}={value}" for key, value in filters.items()]
             filtering_strings = [f"{key}={value}" for key, value in filters.items()]
+            filtering_strings.append(f"dataset={dataset.name}")
             cache_key = "tagstore.__get_tag_keys:{}".format(
             cache_key = "tagstore.__get_tag_keys:{}".format(
                 md5_text(*filtering_strings).hexdigest()
                 md5_text(*filtering_strings).hexdigest()
             )
             )
             key_hash = hash(cache_key)
             key_hash = hash(cache_key)
-            should_cache = (key_hash % 1000) / 1000.0 <= options.get(
-                "snuba.tagstore.cache-tagkeys-rate"
-            )
 
 
-        # If we want to continue attempting to cache after checking against the cache rate
-        if should_cache:
             # Needs to happen before creating the cache suffix otherwise rounding will cause different durations
             # Needs to happen before creating the cache suffix otherwise rounding will cause different durations
             duration = (end - start).total_seconds()
             duration = (end - start).total_seconds()
             # Cause there's rounding to create this cache suffix, we want to update the query end so results match
             # Cause there's rounding to create this cache suffix, we want to update the query end so results match
@@ -240,6 +243,7 @@ class SnubaTagStorage(TagStorage):
 
 
         if result is None:
         if result is None:
             result = snuba.query(
             result = snuba.query(
+                dataset=dataset,
                 start=start,
                 start=start,
                 end=end,
                 end=end,
                 groupby=["tags_key"],
                 groupby=["tags_key"],
@@ -289,6 +293,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
 
 
         data = snuba.query(
         data = snuba.query(
+            dataset=Dataset.Events,
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
             aggregations=aggregations,
             aggregations=aggregations,
@@ -314,7 +319,14 @@ class SnubaTagStorage(TagStorage):
         return self.__get_tag_keys(project_id, None, environment_id and [environment_id])
         return self.__get_tag_keys(project_id, None, environment_id and [environment_id])
 
 
     def get_tag_keys_for_projects(
     def get_tag_keys_for_projects(
-        self, projects, environments, start, end, status=TagKeyStatus.VISIBLE, use_cache=False
+        self,
+        projects,
+        environments,
+        start,
+        end,
+        status=TagKeyStatus.VISIBLE,
+        use_cache=False,
+        include_transactions=False,
     ):
     ):
         MAX_UNSAMPLED_PROJECTS = 50
         MAX_UNSAMPLED_PROJECTS = 50
         # We want to disable FINAL in the snuba query to reduce load.
         # We want to disable FINAL in the snuba query to reduce load.
@@ -333,6 +345,7 @@ class SnubaTagStorage(TagStorage):
             end,
             end,
             include_values_seen=False,
             include_values_seen=False,
             use_cache=use_cache,
             use_cache=use_cache,
+            include_transactions=include_transactions,
             **optimize_kwargs,
             **optimize_kwargs,
         )
         )
 
 
@@ -353,10 +366,12 @@ class SnubaTagStorage(TagStorage):
     def get_group_tag_keys(
     def get_group_tag_keys(
         self, project_id, group_id, environment_ids, limit=None, keys=None, **kwargs
         self, project_id, group_id, environment_ids, limit=None, keys=None, **kwargs
     ):
     ):
+        """ Get tag keys for a specific group """
         return self.__get_tag_keys(
         return self.__get_tag_keys(
             project_id,
             project_id,
             group_id,
             group_id,
             environment_ids,
             environment_ids,
+            dataset=Dataset.Events,
             limit=limit,
             limit=limit,
             keys=keys,
             keys=keys,
             include_values_seen=False,
             include_values_seen=False,
@@ -387,6 +402,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             groupby=["group_id"],
             groupby=["group_id"],
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
@@ -414,6 +430,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             start=start,
             start=start,
             end=end,
             end=end,
             groupby=["group_id"],
             groupby=["group_id"],
@@ -434,6 +451,7 @@ class SnubaTagStorage(TagStorage):
         aggregations = [["count()", "", "count"]]
         aggregations = [["count()", "", "count"]]
 
 
         return snuba.query(
         return snuba.query(
+            dataset=Dataset.Events,
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
             aggregations=aggregations,
             aggregations=aggregations,
@@ -482,6 +500,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
 
 
         values_by_key = snuba.query(
         values_by_key = snuba.query(
+            dataset=Dataset.Events,
             start=kwargs.get("start"),
             start=kwargs.get("start"),
             end=kwargs.get("end"),
             end=kwargs.get("end"),
             groupby=["tags_key", "tags_value"],
             groupby=["tags_key", "tags_value"],
@@ -524,6 +543,7 @@ class SnubaTagStorage(TagStorage):
         orderby = "seen" if first else "-seen"
         orderby = "seen" if first else "-seen"
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             groupby=["tags[sentry:release]"],
             groupby=["tags[sentry:release]"],
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
@@ -560,6 +580,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
         start = self.get_min_start_date(organization_id, project_ids, environment_id, versions)
         start = self.get_min_start_date(organization_id, project_ids, environment_id, versions)
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             start=start,
             start=start,
             groupby=["project_id", col],
             groupby=["project_id", col],
             conditions=conditions,
             conditions=conditions,
@@ -600,6 +621,7 @@ class SnubaTagStorage(TagStorage):
         aggregations = [["max", SEEN_COLUMN, "last_seen"]]
         aggregations = [["max", SEEN_COLUMN, "last_seen"]]
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             groupby=["group_id"],
             groupby=["group_id"],
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
@@ -611,6 +633,7 @@ class SnubaTagStorage(TagStorage):
         return set(result.keys())
         return set(result.keys())
 
 
     def get_group_tag_values_for_users(self, event_users, limit=100):
     def get_group_tag_values_for_users(self, event_users, limit=100):
+        """ While not specific to a group_id, this is currently only used in issues, so the Events dataset is used """
         filters = {"project_id": [eu.project_id for eu in event_users]}
         filters = {"project_id": [eu.project_id for eu in event_users]}
         conditions = [
         conditions = [
             ["tags[sentry:user]", "IN", [_f for _f in [eu.tag_value for eu in event_users] if _f]]
             ["tags[sentry:user]", "IN", [_f for _f in [eu.tag_value for eu in event_users] if _f]]
@@ -622,6 +645,7 @@ class SnubaTagStorage(TagStorage):
         ]
         ]
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             groupby=["group_id", "user_id"],
             groupby=["group_id", "user_id"],
             conditions=conditions,
             conditions=conditions,
             filter_keys=filters,
             filter_keys=filters,
@@ -648,6 +672,7 @@ class SnubaTagStorage(TagStorage):
         aggregations = [["uniq", "tags[sentry:user]", "count"]]
         aggregations = [["uniq", "tags[sentry:user]", "count"]]
 
 
         result = snuba.query(
         result = snuba.query(
+            dataset=Dataset.Events,
             start=start,
             start=start,
             end=end,
             end=end,
             groupby=["group_id"],
             groupby=["group_id"],
@@ -862,6 +887,7 @@ class SnubaTagStorage(TagStorage):
         if environment_ids:
         if environment_ids:
             filters["environment"] = environment_ids
             filters["environment"] = environment_ids
         results = snuba.query(
         results = snuba.query(
+            dataset=Dataset.Events,
             groupby=["tags_value"],
             groupby=["tags_value"],
             filter_keys=filters,
             filter_keys=filters,
             aggregations=[
             aggregations=[
@@ -926,6 +952,7 @@ class SnubaTagStorage(TagStorage):
             conditions.append([f"tags[{tag_name}]", operator, tag_val])
             conditions.append([f"tags[{tag_name}]", operator, tag_val])
 
 
         result = snuba.raw_query(
         result = snuba.raw_query(
+            dataset=Dataset.Events,
             start=start,
             start=start,
             end=end,
             end=end,
             selected_columns=["event_id"],
             selected_columns=["event_id"],