Browse Source

feat: snuba version of group events (#11554)

* feat: Snuba version of group events

Controlled by a switch for now.
Alex Hofsteede 6 years ago
parent
commit
5240c3554f

+ 102 - 50
src/sentry/api/endpoints/group_events.py

@@ -6,16 +6,25 @@ from datetime import timedelta
 from django.db.models import Q
 from django.utils import timezone
 from rest_framework.response import Response
+from functools32 import partial
 
-from sentry import quotas, tagstore
+
+from sentry import options, quotas, tagstore
 from sentry.api.base import DocSection, EnvironmentMixin
 from sentry.api.bases import GroupEndpoint
+from sentry.api.serializers.models.event import SnubaEvent
 from sentry.api.serializers import serialize
-from sentry.api.paginator import DateTimePaginator
+from sentry.api.paginator import DateTimePaginator, GenericOffsetPaginator
 from sentry.models import Environment, Event, Group
 from sentry.search.utils import parse_query
-from sentry.utils.apidocs import scenario, attach_scenarios
 from sentry.search.utils import InvalidQuery
+from sentry.utils.apidocs import scenario, attach_scenarios
+from sentry.utils.validators import is_event_id
+from sentry.utils.snuba import raw_query
+
+
+class NoResults(Exception):
+    pass
 
 
 @scenario('ListAvailableSamples')
@@ -39,62 +48,66 @@ class GroupEventsEndpoint(GroupEndpoint, EnvironmentMixin):
         :auth: required
         """
 
-        def respond(queryset):
-            return self.paginate(
-                request=request,
-                queryset=queryset,
-                order_by='-datetime',
-                on_results=lambda x: serialize(x, request.user),
-                paginator_cls=DateTimePaginator,
-            )
-
-        events = Event.objects.filter(group_id=group.id)
-
         try:
-            environment = self._get_environment_from_request(
-                request,
-                group.project.organization_id,
-            )
-        except Environment.DoesNotExist:
-            return respond(events.none())
-
-        raw_query = request.GET.get('query')
-
-        if raw_query:
-            try:
-                query_kwargs = parse_query([group.project], raw_query, request.user)
-            except InvalidQuery as exc:
-                return Response({'detail': six.text_type(exc)}, status=400)
+            environment = self._get_environment(request, group)
+            query, tags = self._get_search_query_and_tags(request, group, environment)
+        except InvalidQuery as exc:
+            return Response({'detail': six.text_type(exc)}, status=400)
+        except NoResults:
+            return Response([])
+
+        use_snuba = options.get('snuba.events-queries.enabled')
+        backend = self._get_events_snuba if use_snuba else self._get_events_legacy
+        return backend(request, group, environment, query, tags)
+
+    def _get_events_snuba(self, request, group, environment, query, tags):
+        conditions = []
+        if query:
+            msg_substr = ['positionCaseInsensitive', ['message', "'%s'" % (query,)]]
+            message_condition = [msg_substr, '!=', 0]
+            if is_event_id(query):
+                or_condition = [message_condition, ['event_id', '=', query]]
+                conditions.append(or_condition)
             else:
-                query = query_kwargs.pop('query', None)
-                tags = query_kwargs.pop('tags', {})
-        else:
-            query = None
-            tags = {}
+                conditions.append(message_condition)
 
-        if environment is not None:
-            if 'environment' in tags and tags['environment'] != environment.name:
-                # An event can only be associated with a single
-                # environment, so if the environment associated with
-                # the request is different than the environment
-                # provided as a tag lookup, the query cannot contain
-                # any valid results.
-                return respond(events.none())
-            else:
-                tags['environment'] = environment.name
+        if tags:
+            conditions.extend([[u'tags[{}]'.format(k), '=', v] for (k, v) in tags.items()])
+
+        now = timezone.now()
+        data_fn = partial(
+            # extract 'data' from raw_query result
+            lambda *args, **kwargs: raw_query(*args, **kwargs)['data'],
+            start=now - timedelta(days=90),
+            end=now,
+            conditions=conditions,
+            filter_keys={
+                'project_id': [group.project_id],
+                'issue': [group.id]
+            },
+            selected_columns=SnubaEvent.selected_columns + ['tags.key', 'tags.value'],
+            orderby='-timestamp',
+            referrer='api.group-events',
+        )
+
+        return self.paginate(
+            request=request,
+            on_results=lambda results: serialize(
+                [SnubaEvent(row) for row in results], request.user),
+            paginator=GenericOffsetPaginator(data_fn=data_fn)
+        )
+
+    def _get_events_legacy(self, request, group, environment, query, tags):
+        events = Event.objects.filter(group_id=group.id)
 
         if query:
             q = Q(message__icontains=query)
 
-            if len(query) == 32:
+            if is_event_id(query):
                 q |= Q(event_id__exact=query)
 
             events = events.filter(q)
 
-        # TODO currently snuba can be used to get this filter of event_ids matching
-        # the search tags, which is then used to further filter a postgres QuerySet
-        # Ideally we would just use snuba to completely replace the fetching of the
-        # events.
         if tags:
             event_filter = tagstore.get_group_event_filter(
                 group.project_id,
@@ -104,7 +117,7 @@ class GroupEventsEndpoint(GroupEndpoint, EnvironmentMixin):
             )
 
             if not event_filter:
-                return respond(events.none())
+                return Response([])
 
             events = events.filter(**event_filter)
 
@@ -115,4 +128,43 @@ class GroupEventsEndpoint(GroupEndpoint, EnvironmentMixin):
                 datetime__gte=timezone.now() - timedelta(days=retention)
             )
 
-        return respond(events)
+        return self.paginate(
+            request=request,
+            queryset=events,
+            order_by='-datetime',
+            on_results=lambda x: serialize(x, request.user),
+            paginator_cls=DateTimePaginator,
+        )
+
+    def _get_environment(self, request, group):
+        try:
+            return self._get_environment_from_request(
+                request,
+                group.project.organization_id,
+            )
+        except Environment.DoesNotExist:
+            raise NoResults
+
+    def _get_search_query_and_tags(self, request, group, environment=None):
+        raw_query = request.GET.get('query')
+
+        if raw_query:
+            query_kwargs = parse_query([group.project], raw_query, request.user)
+            query = query_kwargs.pop('query', None)
+            tags = query_kwargs.pop('tags', {})
+        else:
+            query = None
+            tags = {}
+
+        if environment is not None:
+            if 'environment' in tags and tags['environment'] != environment.name:
+                # An event can only be associated with a single
+                # environment, so if the environment associated with
+                # the request is different than the environment
+                # provided as a tag lookup, the query cannot contain
+                # any valid results.
+                raise NoResults
+            else:
+                tags['environment'] = environment.name
+
+        return query, tags

+ 6 - 7
src/sentry/api/endpoints/project_events.py

@@ -3,6 +3,7 @@ from __future__ import absolute_import
 from datetime import timedelta
 from django.utils import timezone
 
+from sentry import options
 from sentry.api.base import DocSection
 from sentry.api.bases.project import ProjectEndpoint
 from sentry.api.serializers import serialize
@@ -20,7 +21,7 @@ def list_project_available_samples_scenario(runner):
 class ProjectEventsEndpoint(ProjectEndpoint):
     doc_section = DocSection.EVENTS
 
-    def __search_events_legacy(self, request, project):
+    def _get_events_legacy(self, request, project):
         from sentry import quotas
         from sentry.api.paginator import DateTimePaginator
         from sentry.models import Event
@@ -50,7 +51,7 @@ class ProjectEventsEndpoint(ProjectEndpoint):
             paginator_cls=DateTimePaginator,
         )
 
-    def __search_events_snuba(self, request, project):
+    def _get_events_snuba(self, request, project):
         from functools32 import partial
         from sentry.api.paginator import GenericOffsetPaginator
         from sentry.api.serializers.models.event import SnubaEvent
@@ -97,8 +98,6 @@ class ProjectEventsEndpoint(ProjectEndpoint):
         :pparam string project_slug: the slug of the project the groups
                                      belong to.
         """
-        backend = request.COOKIES.get('eventstream', 'legacy')
-        return {
-            'legacy': self.__search_events_legacy,
-            'snuba': self.__search_events_snuba,
-        }[backend](request, project)
+        use_snuba = options.get('snuba.events-queries.enabled')
+        backend = self._get_events_snuba if use_snuba else self._get_events_legacy
+        return backend(request, project)

+ 18 - 3
src/sentry/api/serializers/models/event.py

@@ -243,6 +243,7 @@ class DetailedEventSerializer(EventSerializer):
     """
     Adds release and user report info to the serialized event.
     """
+
     def serialize(self, obj, attrs, user):
         result = super(DetailedEventSerializer, self).serialize(obj, attrs, user)
         result['release'] = self._get_release_info(user, obj)
@@ -288,7 +289,8 @@ class SnubaEvent(object):
     ]
 
     def __init__(self, kv):
-        assert set(kv.keys()) == set(self.selected_columns)
+        assert len(set(self.selected_columns) - set(kv.keys())
+                   ) == 0, "SnubaEvents need all of the selected_columns"
         self.__dict__ = kv
 
 
@@ -300,8 +302,15 @@ class SnubaEventSerializer(Serializer):
         serialization returned by EventSerializer.
     """
 
+    def get_tags_dict(self, obj):
+        keys = getattr(obj, 'tags.key', None)
+        values = getattr(obj, 'tags.value', None)
+        if keys and values and len(keys) == len(values):
+            return dict(zip(keys, values))
+        return None
+
     def serialize(self, obj, attrs, user):
-        return {
+        result = {
             'eventID': six.text_type(obj.event_id),
             'projectID': six.text_type(obj.project_id),
             'message': obj.message,
@@ -311,5 +320,11 @@ class SnubaEventSerializer(Serializer):
                 'email': obj.email,
                 'username': obj.username,
                 'ipAddress': obj.ip_address,
-            }
+            },
         }
+
+        tags = self.get_tags_dict(obj)
+        if tags:
+            result['tags'] = tags
+
+        return result

+ 1 - 0
src/sentry/options/defaults.py

@@ -145,6 +145,7 @@ register('snuba.search.max-pre-snuba-candidates', default=5000)
 register('snuba.search.chunk-growth-rate', default=1.5)
 register('snuba.search.max-chunk-size', default=2000)
 register('snuba.search.max-total-chunk-time-seconds', default=30.0)
+register('snuba.events-queries.enabled', type=Bool, default=False)
 
 # Kafka Publisher
 register('kafka-publisher.raw-event-sample-rate', default=0.0)

+ 5 - 1
tests/sentry/api/endpoints/test_group_events.py

@@ -5,12 +5,16 @@ import six
 from datetime import timedelta
 from django.utils import timezone
 
-from sentry import tagstore
+from sentry import options, tagstore
 from sentry.models import Environment
 from sentry.testutils import APITestCase
 
 
 class GroupEventsTest(APITestCase):
+    def setUp(self):
+        super(GroupEventsTest, self).setUp()
+        options.set('snuba.events-queries.enabled', False)
+
     def test_simple(self):
         self.login_as(user=self.user)
 

+ 5 - 0
tests/sentry/api/endpoints/test_project_events.py

@@ -6,10 +6,15 @@ from datetime import timedelta
 from django.utils import timezone
 from django.core.urlresolvers import reverse
 
+from sentry import options
 from sentry.testutils import APITestCase
 
 
 class ProjectEventsTest(APITestCase):
+    def setUp(self):
+        super(ProjectEventsTest, self).setUp()
+        options.set('snuba.events-queries.enabled', False)
+
     def test_simple(self):
         self.login_as(user=self.user)
 

+ 266 - 0
tests/snuba/api/endpoints/test_group_events.py

@@ -0,0 +1,266 @@
+from __future__ import absolute_import
+
+import six
+
+from datetime import timedelta
+from django.utils import timezone
+
+from sentry import options
+from sentry.models import Environment
+from sentry.testutils import APITestCase, SnubaTestCase
+
+
+class GroupEventsTest(APITestCase, SnubaTestCase):
+    """
+    This is more or less an exact copy of the tests under:
+
+        /tests/sentry/api/endpoints/test_group_events.py
+
+    with the removal of any explicit tagstore key/value creation calls, and
+    comparing the resulting events by `eventID`, instead of `id`.
+    """
+
+    def setUp(self):
+        super(GroupEventsTest, self).setUp()
+        self.min_ago = timezone.now() - timedelta(minutes=1)
+        options.set('snuba.events-queries.enabled', True)
+
+    def test_simple(self):
+        self.login_as(user=self.user)
+
+        group = self.create_group()
+        event_1 = self.create_event(
+            event_id='a' * 32,
+            datetime=self.min_ago,
+            group=group
+        )
+        event_2 = self.create_event(
+            event_id='b' * 32,
+            datetime=self.min_ago,
+            group=group
+        )
+
+        url = u'/api/0/issues/{}/events/'.format(group.id)
+        response = self.client.get(url, format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 2
+        assert sorted(map(lambda x: x['eventID'], response.data)) == sorted(
+            [
+                six.text_type(event_1.event_id),
+                six.text_type(event_2.event_id),
+            ]
+        )
+
+    def test_tags(self):
+        self.login_as(user=self.user)
+
+        group = self.create_group()
+        event_1 = self.create_event(
+            event_id='a' * 32,
+            datetime=self.min_ago,
+            group=group,
+            tags={
+                'foo': 'baz',
+                'bar': 'buz',
+            }
+        )
+        event_2 = self.create_event(
+            event_id='b' * 32,
+            datetime=self.min_ago,
+            group=group,
+            tags={
+                'bar': 'biz',
+            }
+        )
+
+        url = u'/api/0/issues/{}/events/'.format(group.id)
+        response = self.client.get(url + '?query=foo:baz', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert response.data[0]['eventID'] == six.text_type(event_1.event_id)
+
+        response = self.client.get(url + '?query=bar:biz', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert response.data[0]['eventID'] == six.text_type(event_2.event_id)
+
+        response = self.client.get(url + '?query=bar:biz%20foo:baz', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 0
+
+        response = self.client.get(url + '?query=bar:buz%20foo:baz', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert response.data[0]['eventID'] == six.text_type(event_1.event_id)
+
+        response = self.client.get(url + '?query=bar:baz', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 0
+
+        response = self.client.get(url + '?query=a:b', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 0
+
+        response = self.client.get(url + '?query=bar:b', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 0
+
+        response = self.client.get(url + '?query=bar:baz', format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 0
+
+    def test_search_event_by_id(self):
+        self.login_as(user=self.user)
+
+        group = self.create_group()
+        event_1 = self.create_event(
+            event_id='a' * 32,
+            datetime=self.min_ago,
+            group=group
+        )
+        self.create_event(
+            event_id='b' * 32,
+            datetime=self.min_ago,
+            group=group
+        )
+
+        url = u'/api/0/issues/{}/events/?query={}'.format(group.id, event_1.event_id)
+        response = self.client.get(url, format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert response.data[0]['eventID'] == event_1.event_id
+
+    def test_search_event_by_message(self):
+        self.login_as(user=self.user)
+
+        group = self.create_group()
+        event_1 = self.create_event(
+            event_id='a' * 32,
+            datetime=self.min_ago,
+            group=group,
+            message="foo bar hello world"
+        )
+
+        event_2 = self.create_event(
+            event_id='b' * 32,
+            datetime=self.min_ago,
+            group=group,
+            message='this bar hello world '
+        )
+
+        query_1 = "foo"
+        query_2 = "hello+world"
+
+        # Single Word Query
+        url = u'/api/0/issues/{}/events/?query={}'.format(group.id, query_1)
+        response = self.client.get(url, format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert response.data[0]['eventID'] == event_1.event_id
+
+        # Multiple Word Query
+        url = u'/api/0/issues/{}/events/?query={}'.format(group.id, query_2)
+        response = self.client.get(url, format='json')
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 2
+        assert sorted(map(lambda x: x['eventID'], response.data)) == sorted(
+            [
+                six.text_type(event_1.event_id),
+                six.text_type(event_2.event_id),
+            ]
+        )
+
+    def test_environment(self):
+        self.login_as(user=self.user)
+
+        group = self.create_group()
+        events = {}
+
+        for name in ['production', 'development']:
+            Environment.get_or_create(group.project, name)
+            events[name] = self.create_event(
+                group=group,
+                datetime=self.min_ago,
+                tags={'environment': name},
+            )
+
+        url = u'/api/0/issues/{}/events/'.format(group.id)
+        response = self.client.get(url + '?environment=production', format='json')
+
+        assert response.status_code == 200, response.content
+        assert set(map(lambda x: x['eventID'], response.data)) == set([
+            six.text_type(events['production'].event_id),
+        ])
+
+        url = u'/api/0/issues/{}/events/'.format(group.id)
+        response = self.client.get(url + '?environment=invalid', format='json')
+
+        assert response.status_code == 200, response.content
+        assert response.data == []
+
+        url = u'/api/0/issues/{}/events/'.format(group.id)
+        response = self.client.get(
+            url + '?environment=production&query=environment:development',
+            format='json')
+
+        assert response.status_code == 200, response.content
+        assert response.data == []
+
+    def test_filters_based_on_retention(self):
+        self.login_as(user=self.user)
+
+        project = self.create_project()
+        group = self.create_group(project=project)
+        self.create_event(
+            event_id='a' * 32,
+            group=group,
+            datetime=timezone.now() - timedelta(days=2),
+        )
+        event_2 = self.create_event(
+            event_id='b' * 32,
+            datetime=self.min_ago,
+            group=group
+        )
+
+        with self.options({'system.event-retention-days': 1}):
+            response = self.client.get(u'/api/0/issues/{}/events/'.format(group.id))
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert sorted(map(lambda x: x['eventID'], response.data)) == sorted(
+            [
+                six.text_type(event_2.event_id),
+            ]
+        )
+
+    def test_search_event_has_tags(self):
+        self.login_as(user=self.user)
+
+        group = self.create_group()
+        self.create_event(
+            event_id='a' * 32,
+            datetime=self.min_ago,
+            group=group,
+            message="foo",
+            tags={
+                'logger': 'python',
+            }
+        )
+
+        response = self.client.get(u'/api/0/issues/{}/events/'.format(group.id))
+
+        assert response.status_code == 200, response.content
+        assert len(response.data) == 1
+        assert response.data[0]['tags']['logger'] == 'python'

+ 2 - 1
tests/snuba/api/endpoints/test_project_events.py

@@ -4,6 +4,7 @@ from datetime import timedelta
 from django.utils import timezone
 from django.core.urlresolvers import reverse
 
+from sentry import options
 from sentry.testutils import APITestCase, SnubaTestCase
 
 
@@ -11,7 +12,7 @@ class ProjectEventsTest(APITestCase, SnubaTestCase):
     def setUp(self):
         super(ProjectEventsTest, self).setUp()
         self.min_ago = timezone.now() - timedelta(minutes=1)
-        self.client.cookies['eventstream'] = 'snuba'
+        options.set('snuba.events-queries.enabled', True)
 
     def test_simple(self):
         self.login_as(user=self.user)