Browse Source

feat(tagstore): Refactor and replace queryset methods and implement them for Snuba (#8753)

Brett Hoerner 6 years ago
parent
commit
c269e6cbb4

+ 7 - 2
src/sentry/api/base.py

@@ -202,8 +202,11 @@ class Endpoint(APIView):
         return Response(context, **kwargs)
 
     def paginate(
-        self, request, on_results=None, paginator_cls=Paginator, default_per_page=100, **kwargs
+        self, request, on_results=None, paginator=None,
+        paginator_cls=Paginator, default_per_page=100, **paginator_kwargs
     ):
+        assert (paginator and not paginator_kwargs) or (paginator_cls and paginator_kwargs)
+
         per_page = int(request.GET.get('per_page', default_per_page))
         input_cursor = request.GET.get('cursor')
         if input_cursor:
@@ -213,7 +216,9 @@ class Endpoint(APIView):
 
         assert per_page <= max(100, default_per_page)
 
-        paginator = paginator_cls(**kwargs)
+        if not paginator:
+            paginator = paginator_cls(**paginator_kwargs)
+
         cursor_result = paginator.get_result(
             limit=per_page,
             cursor=input_cursor,

+ 6 - 26
src/sentry/api/endpoints/group_tagkey_values.py

@@ -4,11 +4,9 @@ from sentry import tagstore
 from sentry.api.base import DocSection, EnvironmentMixin
 from sentry.api.bases.group import GroupEndpoint
 from sentry.api.exceptions import ResourceDoesNotExist
-from sentry.api.paginator import DateTimePaginator, Paginator
 from sentry.api.serializers import serialize
 from sentry.api.serializers.models.tagvalue import UserTagValueSerializer
 from sentry.models import Group, Environment
-from sentry.tagstore.types import GroupTagValue
 from sentry.utils.apidocs import scenario
 
 
@@ -51,43 +49,25 @@ class GroupTagKeyValuesEndpoint(GroupEndpoint, EnvironmentMixin):
         except tagstore.TagKeyNotFound:
             raise ResourceDoesNotExist
 
-        queryset = tagstore.get_group_tag_value_qs(
-            group.project_id, group.id, environment_id, lookup_key)
-
         sort = request.GET.get('sort')
         if sort == 'date':
             order_by = '-last_seen'
-            paginator_cls = DateTimePaginator
         elif sort == 'age':
             order_by = '-first_seen'
-            paginator_cls = DateTimePaginator
         else:
             order_by = '-id'
-            paginator_cls = Paginator
 
         if key == 'user':
             serializer_cls = UserTagValueSerializer(group.project_id)
         else:
             serializer_cls = None
 
+        paginator = tagstore.get_group_tag_value_paginator(
+            group.project_id, group.id, environment_id, lookup_key, order_by=order_by
+        )
+
         return self.paginate(
             request=request,
-            queryset=queryset,
-            order_by=order_by,
-            paginator_cls=paginator_cls,
-            on_results=lambda results: serialize(
-                map(  # XXX: This is a pretty big abstraction leak
-                    lambda instance: GroupTagValue(
-                        group_id=instance.group_id,
-                        key=instance.key,
-                        value=instance.value,
-                        times_seen=instance.times_seen,
-                        last_seen=instance.last_seen,
-                        first_seen=instance.first_seen,
-                    ),
-                    results,
-                ),
-                request.user,
-                serializer_cls,
-            ),
+            paginator=paginator,
+            on_results=lambda results: serialize(results, request.user, serializer_cls),
         )

+ 4 - 19
src/sentry/api/endpoints/project_tagkey_values.py

@@ -4,10 +4,8 @@ from sentry import tagstore
 from sentry.api.base import DocSection, EnvironmentMixin
 from sentry.api.bases.project import ProjectEndpoint
 from sentry.api.exceptions import ResourceDoesNotExist
-from sentry.api.paginator import DateTimePaginator
 from sentry.api.serializers import serialize
 from sentry.models import Environment
-from sentry.tagstore.types import TagValue
 
 
 class ProjectTagKeyValuesEndpoint(ProjectEndpoint, EnvironmentMixin):
@@ -40,29 +38,16 @@ class ProjectTagKeyValuesEndpoint(ProjectEndpoint, EnvironmentMixin):
         except tagstore.TagKeyNotFound:
             raise ResourceDoesNotExist
 
-        queryset = tagstore.get_tag_value_qs(
+        paginator = tagstore.get_tag_value_paginator(
             project.id,
             environment_id,
             tagkey.key,
             query=request.GET.get('query'),
+            order_by='-last_seen',
         )
 
         return self.paginate(
             request=request,
-            queryset=queryset,
-            order_by='-last_seen',
-            paginator_cls=DateTimePaginator,
-            on_results=lambda results: serialize(
-                map(  # XXX: This is a pretty big abstraction leak
-                    lambda instance: TagValue(
-                        key=instance.key,
-                        value=instance.value,
-                        times_seen=instance.times_seen,
-                        first_seen=instance.first_seen,
-                        last_seen=instance.last_seen,
-                    ),
-                    results,
-                ),
-                request.user
-            ),
+            paginator=paginator,
+            on_results=lambda results: serialize(results, request.user),
         )

+ 15 - 4
src/sentry/api/paginator.py

@@ -26,7 +26,7 @@ MAX_HITS_LIMIT = 1000
 
 
 class BasePaginator(object):
-    def __init__(self, queryset, order_by=None, max_limit=MAX_LIMIT):
+    def __init__(self, queryset, order_by=None, max_limit=MAX_LIMIT, on_results=None):
         if order_by:
             if order_by.startswith('-'):
                 self.key, self.desc = order_by[1:], True
@@ -37,6 +37,7 @@ class BasePaginator(object):
             self.desc = False
         self.queryset = queryset
         self.max_limit = max_limit
+        self.on_results = on_results
 
     def _is_asc(self, is_prev):
         return (self.desc and is_prev) or not (self.desc or is_prev)
@@ -145,6 +146,7 @@ class BasePaginator(object):
             cursor=cursor,
             is_desc=self.desc,
             key=self.get_item_key,
+            on_results=self.on_results,
         )
 
     def count_hits(self, max_hits):
@@ -219,8 +221,12 @@ class OffsetPaginator(BasePaginator):
         next_cursor = Cursor(limit, page + 1, False, len(results) > limit)
         prev_cursor = Cursor(limit, page - 1, True, page > 0)
 
+        results = list(results[:limit])
+        if self.on_results:
+            results = self.on_results(results)
+
         return CursorResult(
-            results=results[:limit],
+            results=results,
             next=next_cursor,
             prev=prev_cursor,
         )
@@ -254,7 +260,7 @@ def reverse_bisect_left(a, x, lo=0, hi=None):
 
 
 class SequencePaginator(object):
-    def __init__(self, data, reverse=False, max_limit=MAX_LIMIT):
+    def __init__(self, data, reverse=False, max_limit=MAX_LIMIT, on_results=None):
         self.scores, self.values = map(
             list,
             zip(*sorted(data, reverse=reverse)),
@@ -265,6 +271,7 @@ class SequencePaginator(object):
             self.scores,
         )
         self.max_limit = max_limit
+        self.on_results = on_results
 
     def get_result(self, limit, cursor=None, count_hits=False):
         limit = min(limit, self.max_limit)
@@ -312,8 +319,12 @@ class SequencePaginator(object):
             prev_cursor = Cursor(cursor.value, cursor.offset, True, False)
             next_cursor = Cursor(cursor.value, cursor.offset, False, False)
 
+        results = self.values[lo:hi]
+        if self.on_results:
+            results = self.on_results(results)
+
         return CursorResult(
-            self.values[lo:hi],
+            results,
             prev=prev_cursor,
             next=next_cursor,
             hits=min(len(self.scores), MAX_HITS_LIMIT) if count_hits else None,

+ 18 - 3
src/sentry/tagstore/base.py

@@ -56,7 +56,10 @@ class TagStorage(Service):
 
         'get_group_tag_keys_and_top_values',
 
-        'get_tag_value_qs',
+        'get_tag_value_paginator',
+        'get_group_tag_value_paginator',
+        'get_group_tag_value_iter',
+
         'get_group_tag_value_qs',
         'get_event_tag_qs',
     ])
@@ -294,9 +297,21 @@ class TagStorage(Service):
         """
         raise NotImplementedError
 
-    def get_tag_value_qs(self, project_id, environment_id, key, query=None):
+    def get_tag_value_paginator(self, project_id, environment_id, key, query=None):
+        """
+        >>> get_tag_value_paginator(1, 2, 'environment', query='prod')
+        """
+        raise NotImplementedError
+
+    def get_group_tag_value_iter(self, project_id, group_id, environment_id, key, callbacks=()):
+        """
+        >>> get_group_tag_value_iter(1, 2, 3, 'environment')
+        """
+        raise NotImplementedError
+
+    def get_group_tag_value_paginator(self, project_id, group_id, environment_id, key):
         """
-        >>> get_tag_value_qs(1, 2, 'environment', query='prod')
+        >>> get_group_tag_value_paginator(1, 2, 3, 'environment')
         """
         raise NotImplementedError
 

+ 38 - 3
src/sentry/tagstore/legacy/backend.py

@@ -635,7 +635,7 @@ class LegacyTagStorage(TagStorage):
 
         # ANY matches should come last since they're the least specific and
         # will provide the largest range of matches
-        tag_lookups = sorted(six.iteritems(tags), key=lambda (k, v): v == ANY)
+        tag_lookups = sorted(six.iteritems(tags), key=lambda k_v: k_v[1] == ANY)
 
         # get initial matches to start the filter
         matches = candidates or []
@@ -684,7 +684,10 @@ class LegacyTagStorage(TagStorage):
                 ).count(),
             )
 
-    def get_tag_value_qs(self, project_id, environment_id, key, query=None):
+    def get_tag_value_paginator(self, project_id, environment_id, key, query=None,
+            order_by='-last_seen'):
+        from sentry.api.paginator import DateTimePaginator
+
         queryset = models.TagValue.objects.filter(
             project_id=project_id,
             key=key,
@@ -693,7 +696,39 @@ class LegacyTagStorage(TagStorage):
         if query:
             queryset = queryset.filter(value__contains=query)
 
-        return queryset
+        return DateTimePaginator(
+            queryset=queryset,
+            order_by=order_by,
+            on_results=lambda results: map(transformers[models.TagValue], results)
+        )
+
+    def get_group_tag_value_iter(self, project_id, group_id, environment_id, key, callbacks=()):
+        from sentry.utils.query import RangeQuerySetWrapper
+
+        qs = self.get_group_tag_value_qs(
+            project_id, group_id, environment_id, key
+        )
+
+        return RangeQuerySetWrapper(queryset=qs, callbacks=callbacks)
+
+    def get_group_tag_value_paginator(self, project_id, group_id, environment_id, key,
+            order_by='-id'):
+        from sentry.api.paginator import DateTimePaginator, Paginator
+
+        qs = self.get_group_tag_value_qs(project_id, group_id, environment_id, key)
+
+        if order_by in ('-last_seen', '-first_seen'):
+            paginator_cls = DateTimePaginator
+        elif order_by == '-id':
+            paginator_cls = Paginator
+        else:
+            raise ValueError("Unsupported order_by: %s" % order_by)
+
+        return paginator_cls(
+            queryset=qs,
+            order_by=order_by,
+            on_results=lambda results: map(transformers[models.GroupTagValue], results)
+        )
 
     def get_group_tag_value_qs(self, project_id, group_id, environment_id, key, value=None):
         queryset = models.GroupTagValue.objects.filter(key=key)

+ 117 - 0
src/sentry/tagstore/snuba/backend.py

@@ -25,6 +25,7 @@ from sentry.tagstore.exceptions import (
 )
 from sentry.tagstore.types import TagKey, TagValue, GroupTagKey, GroupTagValue
 from sentry.utils import snuba
+from sentry.utils.dates import to_timestamp
 
 
 SEEN_COLUMN = 'timestamp'
@@ -373,6 +374,122 @@ class SnubaTagStorage(TagStorage):
                              referrer='tagstore.get_groups_user_counts')
         return defaultdict(int, {k: v for k, v in result.items() if v})
 
+    def get_tag_value_paginator(self, project_id, environment_id, key, query=None,
+            order_by='-last_seen'):
+        from sentry.api.paginator import SequencePaginator
+
+        if not order_by == '-last_seen':
+            raise ValueError("Unsupported order_by: %s" % order_by)
+
+        conditions = []
+        if query:
+            conditions.append(['tags_value', 'LIKE', '%{}%'.format(query)])
+
+        start, end = self.get_time_range()
+        results = snuba.query(
+            start=start,
+            end=end,
+            groupby=['tags_value'],
+            filter_keys={
+                'project_id': [project_id],
+                'environment': [environment_id],
+                'tags_key': [key],
+            },
+            aggregations=[
+                ['count()', '', 'times_seen'],
+                ['min', 'timestamp', 'first_seen'],
+                ['max', 'timestamp', 'last_seen'],
+            ],
+            conditions=conditions,
+            orderby=order_by,
+            # TODO: This means they can't actually paginate all TagValues.
+            limit=1000,
+        )
+
+        tag_values = [
+            TagValue(
+                key=key,
+                value=value,
+                **fix_tag_value_data(data)
+            ) for value, data in six.iteritems(results)
+        ]
+
+        desc = order_by.startswith('-')
+        score_field = order_by.lstrip('-')
+        return SequencePaginator(
+            [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values],
+            reverse=desc
+        )
+
+    def get_group_tag_value_iter(self, project_id, group_id, environment_id, key, callbacks=()):
+        start, end = self.get_time_range()
+        results = snuba.query(
+            start=start,
+            end=end,
+            groupby=['tags_value'],
+            filter_keys={
+                'project_id': [project_id],
+                'environment': [environment_id],
+                'tags_key': [key],
+                'issue': [group_id],
+            },
+            aggregations=[
+                ['count()', '', 'times_seen'],
+                ['min', 'timestamp', 'first_seen'],
+                ['max', 'timestamp', 'last_seen'],
+            ],
+            orderby='-first_seen',  # Closest thing to pre-existing `-id` order
+            # TODO: This means they can't actually iterate all GroupTagValues.
+            limit=1000,
+        )
+
+        group_tag_values = [
+            GroupTagValue(
+                group_id=group_id,
+                key=key,
+                value=value,
+                **fix_tag_value_data(data)
+            ) for value, data in six.iteritems(results)
+        ]
+
+        for cb in callbacks:
+            cb(group_tag_values)
+
+        return group_tag_values
+
+    def get_group_tag_value_paginator(self, project_id, group_id, environment_id, key,
+            order_by='-id'):
+        from sentry.api.paginator import SequencePaginator
+
+        if order_by in ('-last_seen', '-first_seen'):
+            pass
+        elif order_by == '-id':
+            # Snuba has no unique id per GroupTagValue so we'll substitute `-first_seen`
+            order_by = '-first_seen'
+        else:
+            raise ValueError("Unsupported order_by: %s" % order_by)
+
+        group_tag_values = self.get_group_tag_value_iter(
+            project_id, group_id, environment_id, key
+        )
+
+        desc = order_by.startswith('-')
+        score_field = order_by.lstrip('-')
+        return SequencePaginator(
+            [(int(to_timestamp(getattr(gtv, score_field)) * 1000), gtv) for gtv in group_tag_values],
+            reverse=desc
+        )
+
+    def get_group_tag_value_qs(self, project_id, group_id, environment_id, key, value=None):
+        # This method is not implemented because it is only used by the Django
+        # search backend.
+        raise NotImplementedError
+
+    def get_event_tag_qs(self, project_id, environment_id, key, value):
+        # This method is not implemented because it is only used by the Django
+        # search backend.
+        raise NotImplementedError
+
     def get_group_event_filter(self, project_id, group_id, environment_id, tags):
         start, end = self.get_time_range()
         filters = {

+ 37 - 2
src/sentry/tagstore/v2/backend.py

@@ -979,7 +979,10 @@ class V2TagStorage(TagStorage):
                 ).count(),
             )
 
-    def get_tag_value_qs(self, project_id, environment_id, key, query=None):
+    def get_tag_value_paginator(self, project_id, environment_id, key, query=None,
+            order_by='-last_seen'):
+        from sentry.api.paginator import DateTimePaginator
+
         qs = models.TagValue.objects.select_related('_key').filter(
             project_id=project_id,
             _key__project_id=project_id,
@@ -991,7 +994,39 @@ class V2TagStorage(TagStorage):
         if query:
             qs = qs.filter(value__contains=query)
 
-        return qs
+        return DateTimePaginator(
+            queryset=qs,
+            order_by=order_by,
+            on_results=lambda results: map(transformers[models.TagValue], results)
+        )
+
+    def get_group_tag_value_iter(self, project_id, group_id, environment_id, key, callbacks=()):
+        from sentry.utils.query import RangeQuerySetWrapper
+
+        qs = self.get_group_tag_value_qs(
+            project_id, group_id, environment_id, key
+        )
+
+        return RangeQuerySetWrapper(queryset=qs, callbacks=callbacks)
+
+    def get_group_tag_value_paginator(self, project_id, group_id, environment_id, key,
+            order_by='-id'):
+        from sentry.api.paginator import DateTimePaginator, Paginator
+
+        qs = self.get_group_tag_value_qs(project_id, group_id, environment_id, key)
+
+        if order_by in ('-last_seen', '-first_seen'):
+            paginator_cls = DateTimePaginator
+        elif order_by == '-id':
+            paginator_cls = Paginator
+        else:
+            raise ValueError("Unsupported order_by: %s" % order_by)
+
+        return paginator_cls(
+            queryset=qs,
+            order_by=order_by,
+            on_results=lambda results: map(transformers[models.GroupTagValue], results)
+        )
 
     def get_group_tag_value_qs(self, project_id, group_id, environment_id, key, value=None):
         qs = models.GroupTagValue.objects.select_related('_key', '_value').filter(

+ 5 - 1
src/sentry/utils/cursors.py

@@ -198,7 +198,8 @@ def _build_prev_values(cursor, results, key, limit, is_desc):
     return (prev_value, prev_offset, has_prev)
 
 
-def build_cursor(results, key, limit=100, is_desc=False, cursor=None, hits=None, max_hits=None):
+def build_cursor(results, key, limit=100, is_desc=False, cursor=None, hits=None,
+        max_hits=None, on_results=None):
     if cursor is None:
         cursor = Cursor(0, 0, 0)
 
@@ -233,6 +234,9 @@ def build_cursor(results, key, limit=100, is_desc=False, cursor=None, hits=None,
     next_cursor = Cursor(next_value or 0, next_offset, False, has_next)
     prev_cursor = Cursor(prev_value or 0, prev_offset, True, has_prev)
 
+    if on_results:
+        results = on_results(results)
+
     return CursorResult(
         results=results,
         next=next_cursor,

+ 5 - 4
src/sentry/utils/snuba.py

@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 
+from collections import OrderedDict
 from contextlib import contextmanager
 from dateutil.parser import parse as parse_datetime
 from itertools import chain
@@ -185,12 +186,12 @@ def nest_groups(data, groups, aggregate_cols):
             return {c: data[0][c] for c in aggregate_cols} if data else None
     else:
         g, rest = groups[0], groups[1:]
-        inter = {}
+        inter = OrderedDict()
         for d in data:
             inter.setdefault(d[g], []).append(d)
-        return {
-            k: nest_groups(v, rest, aggregate_cols) for k, v in six.iteritems(inter)
-        }
+        return OrderedDict(
+            (k, nest_groups(v, rest, aggregate_cols)) for k, v in six.iteritems(inter)
+        )
 
 
 def is_condition(cond_or_list):

Some files were not shown because too many files changed in this diff