Browse Source

feat(search): Added boolean terms to Search Grammar (#12935)

* Added feature flag. not sure if I'm trying to change things in the right place

* Added feature flag.

* removed reduce function linter added.

* Added tests

* saving place.

* Added boolean terms to grammer

* Added changes to event_search

* reverted changes to test_simple test

* Reverted changes to raw search

* Fixed boolean tests without parenthesis.

* Removed group-related code

* Causes parser to fail in issue search.
when boolean operators are used.

* small clean up changes.

* removing reduce again.

* Added feature flag changes to this PR.

* updated group index tests.'

* Fixed raw search back from merge conflict.

* made sure longer boolean statements worked.

* changed spacing on things.

* Added order of operation to the boolean parser.

* changed boolean operators to only upper case.

* Removed unnecessary while loop. Added a test that the parser must handle the empty query string.

* removed visit_nested_boolean

* Update tests/sentry/api/test_event_search.py

Co-Authored-By: lauryndbrown <lauryndbrown@gmail.com>

* Changed name to build_boolean_tree_branch
Lauryn Brown 5 years ago
parent
commit
4cea625cc9

+ 15 - 1
src/sentry/api/bases/organization_events.py

@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 
+from sentry import features
 from sentry.api.bases import OrganizationEndpoint, OrganizationEventsError
 from sentry.api.event_search import get_snuba_query_args, InvalidSearchQuery
 
@@ -8,7 +9,20 @@ class OrganizationEventsEndpointBase(OrganizationEndpoint):
 
     def get_snuba_query_args(self, request, organization):
         params = self.get_filter_params(request, organization)
+        query = request.GET.get('query')
+
         try:
-            return get_snuba_query_args(query=request.GET.get('query'), params=params)
+            snuba_args = get_snuba_query_args(query=query, params=params)
         except InvalidSearchQuery as exc:
             raise OrganizationEventsError(exc.message)
+
+        # TODO(lb): remove once boolean search is fully functional
+        has_boolean_op_flag = features.has(
+            'organizations:boolean-search',
+            organization,
+            actor=request.user
+        )
+        if snuba_args.pop('has_boolean_terms', False) and not has_boolean_op_flag:
+            raise OrganizationEventsError(
+                'Boolean search operator OR and AND not allowed in this search.')
+        return snuba_args

+ 23 - 2
src/sentry/api/endpoints/group_events.py

@@ -9,7 +9,7 @@ from rest_framework.response import Response
 from functools import partial
 
 
-from sentry import options, quotas, tagstore
+from sentry import features, options, quotas, tagstore
 from sentry.api.base import DocSection, EnvironmentMixin
 from sentry.api.bases import GroupEndpoint
 from sentry.api.event_search import get_snuba_query_args
@@ -33,6 +33,10 @@ class NoResults(Exception):
     pass
 
 
+class GroupEventsError(Exception):
+    pass
+
+
 @scenario('ListAvailableSamples')
 def list_available_samples_scenario(runner):
     group = Group.objects.filter(project=runner.default_project).first()
@@ -70,9 +74,14 @@ class GroupEventsEndpoint(GroupEndpoint, EnvironmentMixin):
             request.GET.get('enable_snuba') == '1'
             or options.get('snuba.events-queries.enabled')
         )
+
         backend = self._get_events_snuba if use_snuba else self._get_events_legacy
         start, end = get_date_range_from_params(request.GET, optional=True)
-        return backend(request, group, environments, query, tags, start, end)
+
+        try:
+            return backend(request, group, environments, query, tags, start, end)
+        except GroupEventsError as exc:
+            return Response({'detail': six.text_type(exc)}, status=400)
 
     def _get_events_snuba(self, request, group, environments, query, tags, start, end):
         default_end = timezone.now()
@@ -92,6 +101,18 @@ class GroupEventsEndpoint(GroupEndpoint, EnvironmentMixin):
 
         full = request.GET.get('full', False)
         snuba_args = get_snuba_query_args(request.GET.get('query', None), params)
+
+        # TODO(lb): remove once boolean search is fully functional
+        if snuba_args:
+            has_boolean_op_flag = features.has(
+                'organizations:boolean-search',
+                group.project.organization,
+                actor=request.user
+            )
+            if snuba_args.pop('has_boolean_terms', False) and not has_boolean_op_flag:
+                raise GroupEventsError(
+                    'Boolean search operator OR and AND not allowed in this search.')
+
         snuba_cols = SnubaEvent.minimal_columns if full else SnubaEvent.selected_columns
 
         data_fn = partial(

+ 91 - 30
src/sentry/api/event_search.py

@@ -83,48 +83,50 @@ def translate(pat):
 
 
 event_search_grammar = Grammar(r"""
-search          = search_term*
-search_term     = key_val_term / quoted_raw_search / raw_search
-key_val_term    = space? (time_filter / rel_time_filter / specific_time_filter
-                  / numeric_filter / has_filter / is_filter / basic_filter)
-                  space?
-raw_search      = (!key_val_term ~r"\ *([^\ ^\n]+)\ *" )*
-quoted_raw_search = spaces quoted_value spaces
+search               = (boolean_term / search_term)*
+boolean_term         = search_term (boolean_operator search_term)+
+search_term          = key_val_term / quoted_raw_search / raw_search
+key_val_term         = space? (time_filter / rel_time_filter / specific_time_filter
+                       / numeric_filter / has_filter / is_filter / basic_filter)
+                       space?
+raw_search           = (!key_val_term ~r"\ *([^\ ^\n]+)\ *" )*
+quoted_raw_search    = spaces quoted_value spaces
 
 # standard key:val filter
-basic_filter    = negation? search_key sep search_value
+basic_filter         = negation? search_key sep search_value
 # filter for dates
-time_filter     = search_key sep? operator date_format
+time_filter          = search_key sep? operator date_format
 # filter for relative dates
-rel_time_filter = search_key sep rel_date_format
+rel_time_filter      = search_key sep rel_date_format
 # exact time filter for dates
 specific_time_filter = search_key sep date_format
 # Numeric comparison filter
-numeric_filter  = search_key sep operator? ~r"[0-9]+(?=\s|$)"
+numeric_filter       = search_key sep operator? ~r"[0-9]+(?=\s|$)"
 
 # has filter for not null type checks
-has_filter      = negation? "has" sep (search_key / search_value)
-is_filter       = negation? "is" sep search_value
-
-search_key      = key / quoted_key
-search_value    = quoted_value / value
-value           = ~r"\S*"
-quoted_value    = ~r"\"((?:[^\"]|(?<=\\)[\"])*)?\""s
-key             = ~r"[a-zA-Z0-9_\.-]+"
+has_filter           = negation? "has" sep (search_key / search_value)
+is_filter            = negation? "is" sep search_value
+
+search_key           = key / quoted_key
+search_value         = quoted_value / value
+value                = ~r"\S*"
+quoted_value         = ~r"\"((?:[^\"]|(?<=\\)[\"])*)?\""s
+key                  = ~r"[a-zA-Z0-9_\.-]+"
 # only allow colons in quoted keys
-quoted_key      = ~r"\"([a-zA-Z0-9_\.:-]+)\""
+quoted_key           = ~r"\"([a-zA-Z0-9_\.:-]+)\""
 
-date_format     = ~r"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,6})?)?Z?(?=\s|$)"
-rel_date_format = ~r"[\+\-][0-9]+[wdhm](?=\s|$)"
+date_format          = ~r"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,6})?)?Z?(?=\s|$)"
+rel_date_format      = ~r"[\+\-][0-9]+[wdhm](?=\s|$)"
 
 # NOTE: the order in which these operators are listed matters
 # because for example, if < comes before <= it will match that
 # even if the operator is <=
-operator        = ">=" / "<=" / ">" / "<" / "=" / "!="
-sep             = ":"
-space           = " "
-negation        = "!"
-spaces          = ~r"\ *"
+boolean_operator     = "OR" / "AND"
+operator             = ">=" / "<=" / ">" / "<" / "=" / "!="
+sep                  = ":"
+space                = " "
+negation             = "!"
+spaces               = ~r"\ *"
 """)
 
 
@@ -144,6 +146,18 @@ class InvalidSearchQuery(Exception):
     pass
 
 
+def has_boolean_search_terms(search_terms):
+    for term in search_terms:
+        if isinstance(term, SearchBoolean):
+            return True
+    return False
+
+
+class SearchBoolean(namedtuple('SearchBoolean', 'left_term operator right_term')):
+    BOOLEAN_AND = "AND"
+    BOOLEAN_OR = "OR"
+
+
 class SearchFilter(namedtuple('SearchFilter', 'key operator value')):
 
     def __str__(self):
@@ -215,21 +229,28 @@ class SearchVisitor(NodeVisitor):
                 lookup[source_field] = target_field
         return lookup
 
-    def visit_search(self, node, children):
-        # there is a list from search_term and one from raw_search, so flatten them.
-        # Flatten each group in the list, since nodes can return multiple items
+    def flatten(self, children):
         def _flatten(seq):
+            # there is a list from search_term and one from raw_search, so flatten them.
+            # Flatten each group in the list, since nodes can return multiple items
             for item in seq:
                 if isinstance(item, list):
                     for sub in _flatten(item):
                         yield sub
                 else:
                     yield item
+
+        if not (children and isinstance(children, list) and isinstance(children[0], list)):
+            return children
+
         children = [child for group in children for child in _flatten(group)]
         children = filter(None, _flatten(children))
 
         return children
 
+    def visit_search(self, node, children):
+        return self.flatten(children)
+
     def visit_key_val_term(self, node, children):
         _, key_val_term, _ = children
         # key_val_term is a list because of group
@@ -253,6 +274,34 @@ class SearchVisitor(NodeVisitor):
             return None
         return SearchFilter(SearchKey('message'), "=", SearchValue(value))
 
+    def visit_boolean_term(self, node, children):
+        def find_next_operator(children, start, end, operator):
+            for index in range(start, end):
+                if children[index] == operator:
+                    return index
+            return None
+
+        def build_boolean_tree_branch(children, start, end, operator):
+            index = find_next_operator(children, start, end, operator)
+            if index is None:
+                return None
+            left = build_boolean_tree(children, start, index)
+            right = build_boolean_tree(children, index + 1, end)
+            return SearchBoolean(left, children[index], right)
+
+        def build_boolean_tree(children, start, end):
+            if end - start == 1:
+                return children[start]
+
+            result = build_boolean_tree_branch(children, start, end, SearchBoolean.BOOLEAN_OR)
+            if result is None:
+                result = build_boolean_tree_branch(children, start, end, SearchBoolean.BOOLEAN_AND)
+
+            return result
+
+        children = self.flatten(children)
+        return [build_boolean_tree(children, 0, len(children))]
+
     def visit_numeric_filter(self, node, children):
         (search_key, _, operator, search_value) = children
         operator = operator[0] if not isinstance(operator, Node) else '='
@@ -387,6 +436,9 @@ class SearchVisitor(NodeVisitor):
     def visit_search_value(self, node, children):
         return SearchValue(children[0])
 
+    def visit_boolean_operator(self, node, children):
+        return node.text
+
     def visit_value(self, node, children):
         return node.text
 
@@ -515,7 +567,16 @@ def get_snuba_query_args(query=None, params=None):
         'conditions': [],
         'filter_keys': {},
     }
+
+    # TODO(lb): remove when boolean terms fully functional
+    if has_boolean_search_terms(parsed_filters):
+        kwargs['has_boolean_terms'] = True
+
     for _filter in parsed_filters:
+        # TODO(lb): remove when boolean terms fully functional
+        if isinstance(_filter, SearchBoolean):
+            continue
+
         snuba_name = _filter.key.snuba_name
 
         if snuba_name in ('start', 'end'):

+ 4 - 0
src/sentry/api/issue_search.py

@@ -67,6 +67,10 @@ class IssueSearchVisitor(SearchVisitor):
             search_value,
         )
 
+    def visit_boolean_operator(self, node, children):
+        raise InvalidSearchQuery(
+            'Boolean statements containing "OR" or "AND" are not supported in this search')
+
 
 def parse_search_query(query):
     tree = event_search_grammar.parse(query)

+ 2 - 0
src/sentry/conf/server.py

@@ -798,6 +798,8 @@ SENTRY_FEATURES = {
     'organizations:advanced-search': True,
     # Enable obtaining and using API keys.
     'organizations:api-keys': False,
+    # Enable explicit use of AND and OR in search.
+    'organizations:boolean-search': False,
     # Enable creating organizations within sentry (if SENTRY_SINGLE_ORGANIZATION
     # is not enabled).
     'organizations:create': True,

+ 1 - 0
src/sentry/features/__init__.py

@@ -54,6 +54,7 @@ default_manager.add('organizations:create')
 
 # Organization scoped features
 default_manager.add('organizations:advanced-search', OrganizationFeature)  # NOQA
+default_manager.add('organizations:boolean-search', OrganizationFeature)  # NOQA
 default_manager.add('organizations:api-keys', OrganizationFeature)  # NOQA
 default_manager.add('organizations:discover', OrganizationFeature)  # NOQA
 default_manager.add('organizations:events', OrganizationFeature)  # NOQA

+ 175 - 2
tests/sentry/api/test_event_search.py

@@ -9,7 +9,7 @@ from parsimonious.exceptions import IncompleteParseError
 
 from sentry.api.event_search import (
     convert_endpoint_params, event_search_grammar, get_snuba_query_args,
-    parse_search_query, InvalidSearchQuery, SearchFilter, SearchKey,
+    parse_search_query, InvalidSearchQuery, SearchBoolean, SearchFilter, SearchKey,
     SearchValue, SearchVisitor,
 )
 from sentry.testutils import TestCase
@@ -477,7 +477,7 @@ class ParseSearchQueryTest(TestCase):
             ),
         ]
 
-    def test_empty_string(self):
+    def test_empty_filter_value(self):
         assert parse_search_query('device.family:""') == [
             SearchFilter(
                 key=SearchKey(name='device.family'),
@@ -685,6 +685,179 @@ class ParseSearchQueryTest(TestCase):
         for query, expected in queries:
             assert parse_search_query(query) == [expected]
 
+    def test_empty_string(self):
+        # Empty quotations become a dropped term
+        assert parse_search_query('') == []
+
+
+class ParseBooleanSearchQueryTest(TestCase):
+    def setUp(self):
+        super(ParseBooleanSearchQueryTest, self).setUp()
+        self.term1 = SearchFilter(
+            key=SearchKey(name='user.email'),
+            operator="=",
+            value=SearchValue(raw_value='foo@example.com'),
+        )
+        self.term2 = SearchFilter(
+            key=SearchKey(name='user.email'),
+            operator="=",
+            value=SearchValue(raw_value='bar@example.com'),
+        )
+        self.term3 = SearchFilter(
+            key=SearchKey(name='user.email'),
+            operator="=",
+            value=SearchValue(raw_value='foobar@example.com'),
+        )
+
+    def test_simple(self):
+        assert parse_search_query(
+            'user.email:foo@example.com OR user.email:bar@example.com'
+        ) == [SearchBoolean(left_term=self.term1, operator="OR", right_term=self.term2)]
+
+        assert parse_search_query(
+            'user.email:foo@example.com AND user.email:bar@example.com'
+        ) == [SearchBoolean(left_term=self.term1, operator="AND", right_term=self.term2)]
+
+    def test_single_term(self):
+        assert parse_search_query('user.email:foo@example.com') == [self.term1]
+
+    def test_order_of_operations(self):
+        assert parse_search_query(
+            'user.email:foo@example.com OR user.email:bar@example.com AND user.email:foobar@example.com'
+        ) == [SearchBoolean(
+            left_term=self.term1,
+            operator='OR',
+            right_term=SearchBoolean(
+                left_term=self.term2,
+                operator='AND',
+                right_term=self.term3
+            )
+        )]
+        assert parse_search_query(
+            'user.email:foo@example.com AND user.email:bar@example.com OR user.email:foobar@example.com'
+        ) == [SearchBoolean(
+            left_term=SearchBoolean(
+                left_term=self.term1,
+                operator='AND',
+                right_term=self.term2,
+            ),
+            operator='OR',
+            right_term=self.term3
+        )]
+
+    def test_multiple_statements(self):
+        assert parse_search_query(
+            'user.email:foo@example.com OR user.email:bar@example.com OR user.email:foobar@example.com'
+        ) == [SearchBoolean(
+            left_term=self.term1,
+            operator='OR',
+            right_term=SearchBoolean(
+                left_term=self.term2,
+                operator='OR',
+                right_term=self.term3
+            )
+        )]
+
+        assert parse_search_query(
+            'user.email:foo@example.com AND user.email:bar@example.com AND user.email:foobar@example.com'
+        ) == [SearchBoolean(
+            left_term=self.term1,
+            operator='AND',
+            right_term=SearchBoolean(
+                left_term=self.term2,
+                operator='AND',
+                right_term=self.term3
+            )
+        )]
+
+        term4 = SearchFilter(
+            key=SearchKey(name='user.email'),
+            operator="=",
+            value=SearchValue(raw_value='hello@example.com'),
+        )
+
+        # longer even number of terms
+        assert parse_search_query(
+            'user.email:foo@example.com AND user.email:bar@example.com OR user.email:foobar@example.com AND user.email:hello@example.com'
+        ) == [SearchBoolean(
+            left_term=SearchBoolean(
+                left_term=self.term1,
+                operator='AND',
+                right_term=self.term2
+            ),
+            operator='OR',
+            right_term=SearchBoolean(
+                left_term=self.term3,
+                operator='AND',
+                right_term=term4
+            )
+        )]
+
+        term5 = SearchFilter(
+            key=SearchKey(name='user.email'),
+            operator="=",
+            value=SearchValue(raw_value='hi@example.com'),
+        )
+
+        # longer odd number of terms
+        assert parse_search_query(
+            'user.email:foo@example.com AND user.email:bar@example.com OR user.email:foobar@example.com AND user.email:hello@example.com AND user.email:hi@example.com'
+        ) == [
+            SearchBoolean(
+                left_term=SearchBoolean(
+                    left_term=self.term1,
+                    operator='AND',
+                    right_term=self.term2
+                ),
+                operator='OR',
+                right_term=SearchBoolean(
+                    left_term=self.term3,
+                    operator='AND',
+                    right_term=SearchBoolean(
+                        left_term=term4,
+                        operator='AND',
+                        right_term=term5
+                    )
+                )
+            )]
+
+        # absurdly long
+        assert parse_search_query(
+            'user.email:foo@example.com AND user.email:bar@example.com OR user.email:foobar@example.com AND user.email:hello@example.com AND user.email:hi@example.com OR user.email:foo@example.com AND user.email:bar@example.com OR user.email:foobar@example.com AND user.email:hello@example.com AND user.email:hi@example.com'
+        ) == [SearchBoolean(
+            left_term=SearchBoolean(
+                left_term=self.term1,
+                operator='AND',
+                right_term=self.term2),
+            operator='OR',
+            right_term=SearchBoolean(
+                left_term=SearchBoolean(
+                    left_term=self.term3,
+                    operator='AND',
+                    right_term=SearchBoolean(
+                        left_term=term4,
+                        operator='AND',
+                        right_term=term5)),
+                operator='OR',
+                right_term=SearchBoolean(
+                    left_term=SearchBoolean(
+                        left_term=self.term1,
+                        operator='AND',
+                        right_term=self.term2),
+                    operator='OR',
+                    right_term=SearchBoolean(
+                        left_term=self.term3,
+                        operator='AND',
+                        right_term=SearchBoolean(
+                            left_term=term4,
+                            operator='AND',
+                            right_term=term5
+                        )
+                    )
+                )
+            )
+        )]
+
 
 class GetSnubaQueryArgsTest(TestCase):
     def test_simple(self):

+ 14 - 0
tests/sentry/api/test_issue_search.py

@@ -142,6 +142,20 @@ class ParseSearchQueryTest(TestCase):
             ):
                 parse_search_query(invalid_query)
 
+    def test_boolean_operators_not_allowed(self):
+        invalid_queries = [
+            'user.email:foo@example.com OR user.email:bar@example.com',
+            'user.email:foo@example.com AND user.email:bar@example.com',
+            'user.email:foo@example.com OR user.email:bar@example.com OR user.email:foobar@example.com',
+            'user.email:foo@example.com AND user.email:bar@example.com AND user.email:foobar@example.com',
+        ]
+        for invalid_query in invalid_queries:
+            with self.assertRaises(
+                InvalidSearchQuery,
+                expected_regex='Boolean statements containing "OR" or "AND" are not supported in this search',
+            ):
+                parse_search_query(invalid_query)
+
 
 class ConvertQueryValuesTest(TestCase):
 

+ 10 - 0
tests/snuba/api/endpoints/test_group_events.py

@@ -363,3 +363,13 @@ class GroupEventsTest(APITestCase, SnubaTestCase):
                     six.text_type(event.event_id),
                 ]
             )
+
+    def test_boolean_feature_flag_failure(self):
+        self.login_as(user=self.user)
+        group = self.create_group()
+
+        for query in ['title:hi OR title:hello', 'title:hi AND title:hello']:
+            url = u'/api/0/issues/{}/events/?query={}'.format(group.id, query)
+            response = self.client.get(url, format='json')
+            assert response.status_code == 400
+            assert response.content == '{"detail": "Boolean search operator OR and AND not allowed in this search."}'

+ 15 - 0
tests/snuba/api/endpoints/test_organization_events.py

@@ -573,6 +573,21 @@ class OrganizationEventsEndpointTest(OrganizationEventsTestBase):
         assert response.status_code == 200, response.content
         assert len(response.data) == 0
 
+    def test_boolean_feature_flag_failure(self):
+        self.login_as(user=self.user)
+        project = self.create_project()
+        url = reverse(
+            'sentry-api-0-organization-events',
+            kwargs={
+                'organization_slug': project.organization.slug,
+            }
+        )
+
+        for query in ['title:hi OR title:hello', 'title:hi AND title:hello']:
+            response = self.client.get(url, {'query': query}, format='json')
+            assert response.status_code == 400
+            assert response.content == '{"detail": "Boolean search operator OR and AND not allowed in this search."}'
+
 
 class OrganizationEventsStatsEndpointTest(OrganizationEventsTestBase):
     def test_simple(self):

Some files were not shown because too many files changed in this diff