Browse Source

refs(api): Stop returning unique values seen from most tag endpoints.

Based on https://github.com/getsentry/sentry/pull/12115. This will give us a substantial performance
improvement when fetching tags. The `uniqueValues` value isn't used at all by our frontend, and so
it should be fine to just remove it completely.

There are some old endpoints where someone might have an integration with us that this breaks.
`ProjectTagsEndpoint` isn't a concern for performance, so I've left it as is for now.
`GroupTagsEndpoint` could potentially cause issues, but is likely worthwhile to take the risk for
the performance gain. `GroupDetailsEndpoint` is in a similar spot to `GroupTagsEndpoint`, probably
worth the risk.

If we want to be more cautious, we could parameterize the endpoints and have FE pass in a value so
that we stop including this value.

Fixes SEN-235
Dan Fuller 6 years ago
parent
commit
91bc370ac4

+ 4 - 0
src/sentry/api/endpoints/project_tags.py

@@ -20,6 +20,10 @@ class ProjectTagsEndpoint(ProjectEndpoint, EnvironmentMixin):
                 tagstore.get_tag_keys(
                     project.id,
                     environment_id,
+                    # We might be able to stop including these values, but this
+                    # is a pretty old endpoint, so concerned about breaking
+                    # existing api consumers.
+                    include_values_seen=True,
                 ),
                 key=lambda x: x.key)
 

+ 5 - 2
src/sentry/tagstore/base.py

@@ -216,9 +216,12 @@ class TagStorage(Service):
         """
         raise NotImplementedError
 
-    def get_tag_keys(self, project_id, environment_id, status=TagKeyStatus.VISIBLE):
+    def get_tag_keys(
+        self, project_id, environment_id, status=TagKeyStatus.VISIBLE,
+        include_values_seen=False,
+    ):
         """
-        >>> get_tag_key(1, 2)
+        >>> get_tag_keys(1, 2)
         """
         raise NotImplementedError
 

+ 4 - 1
src/sentry/tagstore/legacy/backend.py

@@ -299,7 +299,10 @@ class LegacyTagStorage(TagStorage):
 
         return transformers[models.TagKey](instance)
 
-    def get_tag_keys(self, project_id, environment_id, status=TagKeyStatus.VISIBLE):
+    def get_tag_keys(
+        self, project_id, environment_id, status=TagKeyStatus.VISIBLE,
+        include_values_seen=False,
+    ):
         qs = models.TagKey.objects.filter(project_id=project_id)
 
         if status is not None:

+ 35 - 18
src/sentry/tagstore/snuba/backend.py

@@ -151,7 +151,10 @@ class SnubaTagStorage(TagStorage):
                 top_values=top_values
             )
 
-    def __get_tag_keys(self, project_id, group_id, environment_ids, limit=1000, keys=None):
+    def __get_tag_keys(
+        self, project_id, group_id, environment_ids, limit=1000, keys=None,
+        include_values_seen=True,
+    ):
         start, end = self.get_time_range()
         return self.__get_tag_keys_for_projects(
             [project_id],
@@ -161,11 +164,12 @@ class SnubaTagStorage(TagStorage):
             end,
             limit,
             keys,
+            include_values_seen=include_values_seen,
         )
 
     def __get_tag_keys_for_projects(
-            self, projects, group_id, environments, start, end, limit=1000,
-            keys=None, **kwargs
+        self, projects, group_id, environments, start, end, limit=1000,
+        keys=None, include_values_seen=True, **kwargs
     ):
         filters = {
             'project_id': projects,
@@ -177,17 +181,17 @@ class SnubaTagStorage(TagStorage):
         if keys is not None:
             filters['tags_key'] = keys
         aggregations = [
-            ['uniq', 'tags_value', 'values_seen'],
             ['count()', '', 'count']
         ]
+
+        if include_values_seen:
+            aggregations.append(['uniq', 'tags_value', 'values_seen'])
         conditions = [['tags_key', 'NOT IN', self.EXCLUDE_TAG_KEYS]]
 
-        # TODO should this be sorted by count() descending, rather than the
-        # number of unique values
         result = snuba.query(
             start, end, ['tags_key'], conditions, filters, aggregations,
-            limit=limit, orderby='-values_seen',
-            referrer='tagstore.__get_tag_keys', **kwargs
+            limit=limit, orderby='-count', referrer='tagstore.__get_tag_keys',
+            **kwargs
         )
 
         if group_id is None:
@@ -195,13 +199,19 @@ class SnubaTagStorage(TagStorage):
         else:
             ctor = functools.partial(GroupTagKey, group_id=group_id)
 
-        return set([
-            ctor(
-                key=key,
-                values_seen=data['values_seen'],
-                count=data['count'],
-            ) for key, data in six.iteritems(result) if data['values_seen']
-        ])
+        results = set()
+        for key, data in six.iteritems(result):
+            params = {'key': key}
+            if include_values_seen:
+                params['values_seen'] = data['values_seen']
+                params['count'] = data['count']
+            else:
+                # If only one aggregate is requested then data is just that raw
+                # aggregate value, rather than a dictionary of
+                # key:aggregate_value pairs
+                params['count'] = data
+            results.add(ctor(**params))
+        return results
 
     def __get_tag_value(self, project_id, group_id, environment_id, key, value):
         start, end = self.get_time_range()
@@ -238,7 +248,10 @@ class SnubaTagStorage(TagStorage):
         assert status is TagKeyStatus.VISIBLE
         return self.__get_tag_key_and_top_values(project_id, None, environment_id, key)
 
-    def get_tag_keys(self, project_id, environment_id, status=TagKeyStatus.VISIBLE):
+    def get_tag_keys(
+        self, project_id, environment_id, status=TagKeyStatus.VISIBLE,
+        include_values_seen=False,
+    ):
         assert status is TagKeyStatus.VISIBLE
         return self.__get_tag_keys(project_id, None, environment_id and [environment_id])
 
@@ -254,7 +267,8 @@ class SnubaTagStorage(TagStorage):
         if len(projects) <= MAX_UNSAMPLED_PROJECTS:
             optimize_kwargs['sample'] = 1
         return self.__get_tag_keys_for_projects(
-            projects, None, environments, start, end, **optimize_kwargs
+            projects, None, environments, start, end, include_values_seen=False,
+            **optimize_kwargs
         )
 
     def get_tag_value(self, project_id, environment_id, key, value):
@@ -270,7 +284,10 @@ class SnubaTagStorage(TagStorage):
             project_id, group_id, environment_id, key, limit=TOP_VALUES_DEFAULT_LIMIT)
 
     def get_group_tag_keys(self, project_id, group_id, environment_ids, limit=None, keys=None):
-        return self.__get_tag_keys(project_id, group_id, environment_ids, limit=limit, keys=keys)
+        return self.__get_tag_keys(
+            project_id, group_id, environment_ids, limit=limit, keys=keys,
+            include_values_seen=False,
+        )
 
     def get_group_tag_value(self, project_id, group_id, environment_id, key, value):
         return self.__get_tag_value(project_id, group_id, environment_id, key, value)

+ 8 - 7
src/sentry/tagstore/types.py

@@ -1,5 +1,7 @@
 from __future__ import absolute_import
 
+from sentry.api.serializers import Serializer, register, serialize
+
 import six
 
 from sentry.search.utils import convert_user_tag_to_query
@@ -31,7 +33,8 @@ class TagType(object):
 class TagKey(TagType):
     __slots__ = ['key', 'values_seen', 'status']
 
-    def __init__(self, key, values_seen, status=TagKeyStatus.VISIBLE, count=None, top_values=None):
+    def __init__(self, key, values_seen=None, status=TagKeyStatus.VISIBLE,
+                 count=None, top_values=None):
         self.key = key
         self.values_seen = values_seen
         self.status = status
@@ -58,7 +61,7 @@ class TagValue(TagType):
 class GroupTagKey(TagType):
     __slots__ = ['group_id', 'key', 'values_seen']
 
-    def __init__(self, group_id, key, values_seen, count=None, top_values=None):
+    def __init__(self, group_id, key, values_seen=None, count=None, top_values=None):
         self.group_id = group_id
         self.key = key
         self.values_seen = values_seen
@@ -78,9 +81,6 @@ class GroupTagValue(TagType):
         self.last_seen = last_seen
 
 
-from sentry.api.serializers import Serializer, register, serialize
-
-
 @register(GroupTagKey)
 @register(TagKey)
 class TagKeySerializer(Serializer):
@@ -89,9 +89,10 @@ class TagKeySerializer(Serializer):
 
         output = {
             'key': tagstore.get_standardized_key(obj.key),
-            'name': tagstore.get_tag_key_label(obj.key),
-            'uniqueValues': obj.values_seen,
+            'name': tagstore.get_tag_key_label(obj.key)
         }
+        if obj.values_seen is not None:
+            output['uniqueValues'] = obj.values_seen
         if obj.count is not None:
             output['totalValues'] = obj.count
         if obj.top_values is not None:

+ 4 - 1
src/sentry/tagstore/v2/backend.py

@@ -441,7 +441,10 @@ class V2TagStorage(TagStorage):
 
         return transformers[models.TagKey](instance)
 
-    def get_tag_keys(self, project_id, environment_id, status=TagKeyStatus.VISIBLE):
+    def get_tag_keys(
+        self, project_id, environment_id, status=TagKeyStatus.VISIBLE,
+        include_values_seen=False,
+    ):
         qs = models.TagKey.objects.filter(
             project_id=project_id,
         )

+ 0 - 1
tests/sentry/api/endpoints/test_group_tagkey_details.py

@@ -48,5 +48,4 @@ class GroupTagDetailsTest(APITestCase):
         response = self.client.get(url, format='json')
         assert response.status_code == 200, response.content
         assert response.data['key'] == six.text_type(tagkey.key)
-        assert response.data['uniqueValues'] == 1
         assert response.data['totalValues'] == 3

+ 0 - 36
tests/sentry/api/endpoints/test_project_tags.py

@@ -1,36 +0,0 @@
-from __future__ import absolute_import
-
-from django.core.urlresolvers import reverse
-
-from sentry import tagstore
-from sentry.testutils import APITestCase
-
-
-class ProjectTagsTest(APITestCase):
-    def test_simple(self):
-        project = self.create_project()
-
-        for key in ('foo', 'bar', 'environment'):
-            tagstore.create_tag_key(
-                project_id=project.id,
-                environment_id=None,
-                key=key,
-            )
-
-        self.login_as(user=self.user)
-
-        url = reverse(
-            'sentry-api-0-project-tags',
-            kwargs={
-                'organization_slug': project.organization.slug,
-                'project_slug': project.slug,
-            }
-        )
-        response = self.client.get(url, format='json')
-        assert response.status_code == 200, response.content
-        data = {v['key']: v for v in response.data}
-        assert len(data) == 3
-
-        data['foo']['canDelete'] is True
-        data['bar']['canDelete'] is True
-        data['environment']['canDelete'] is False

+ 2 - 5
tests/snuba/api/endpoints/test_group_tags.py

@@ -37,8 +37,5 @@ class GroupTagsTest(APITestCase, SnubaTestCase):
             format='json'
         )
         assert response.status_code == 200
-        assert set([
-            (tag['key'], tag['uniqueValues']) for tag in response.data
-        ]) >= set([
-            ('biz', 1), ('environment', 2), ('foo', 1)
-        ])
+        assert set([tag['key'] for tag in response.data
+                    ]) >= set(['biz', 'environment', 'foo'])

+ 5 - 3
tests/snuba/api/endpoints/test_organization_tags.py

@@ -45,9 +45,11 @@ class OrganizationTagsTest(APITestCase, SnubaTestCase):
 
         response = self.client.get(url, format='json')
         assert response.status_code == 200, response.content
-        assert response.data == [
-            {'uniqueValues': 2, 'name': 'Fruit', 'key': 'fruit', 'totalValues': 3},
-            {'uniqueValues': 1, 'name': 'Some Tag', 'key': 'some_tag', 'totalValues': 1},
+        data = response.data
+        data.sort(key=lambda val: val['totalValues'], reverse=True)
+        assert data == [
+            {'name': 'Fruit', 'key': 'fruit', 'totalValues': 3},
+            {'name': 'Some Tag', 'key': 'some_tag', 'totalValues': 1},
         ]
 
     def test_no_projects(self):

Some files were not shown because too many files changed in this diff