Browse Source

fix(apm) Do a better job of handling parens in raw searches (#20048)

The parser was originally matching parens inside raw searches as boolean
expressions and was failing in interesting ways. Change the parser to treat
these as raw searches instead. In the interest of fixing this quickly, the
parser will actually split on the parens and return two expressions.

`TypeError Anonymous function(app/javascript/utils/transform-object-keys)` =>
`message:"TypeError Anonymous function" AND message:"app/javascript/utils/transform-object-keys"`
evanh 4 years ago
parent
commit
5a2b27d472

+ 8 - 5
src/sentry/api/event_search.py

@@ -108,7 +108,7 @@ event_search_grammar = Grammar(
     r"""
 search               = (boolean_operator / paren_term / search_term)*
 boolean_operator     = spaces (or_operator / and_operator) spaces
-paren_term           = spaces open_paren space? (paren_term / boolean_operator / search_term)+ space? closed_paren spaces
+paren_term           = spaces open_paren spaces (paren_term / boolean_operator / search_term)+ spaces closed_paren spaces
 search_term          = key_val_term / quoted_raw_search / raw_search
 key_val_term         = spaces (tag_filter / time_filter / rel_time_filter / specific_time_filter / duration_filter
                        / numeric_filter / aggregate_filter / aggregate_date_filter / aggregate_rel_date_filter / has_filter
@@ -364,12 +364,15 @@ class SearchVisitor(NodeVisitor):
 
     def visit_paren_term(self, node, children):
         if not self.allow_boolean:
-            raise InvalidSearchQuery(
-                "Grouping filters using parentheses () is not supported in this search"
-            )
+            # It's possible to have a valid search that includes parens, so we can't just error out when we find a paren expression.
+            return self.visit_raw_search(node, children)
 
         children = self.remove_space(self.remove_optional_nodes(self.flatten(children)))
-        return ParenExpression(self.flatten(children[1]))
+        children = self.flatten(children[1])
+        if len(children) == 0:
+            return node.text
+
+        return ParenExpression(children)
 
     def visit_numeric_filter(self, node, children):
         (search_key, _, operator, search_value) = children

+ 1 - 1
src/sentry/api/issue_search.py

@@ -84,7 +84,7 @@ def parse_search_query(query):
                 "This is commonly caused by unmatched-parentheses. Enclose any text in double quotes.",
             )
         )
-    return IssueSearchVisitor().visit(tree)
+    return IssueSearchVisitor(allow_boolean=False).visit(tree)
 
 
 def convert_actor_value(value, projects, user, environments):

+ 42 - 3
tests/sentry/api/test_event_search.py

@@ -1098,9 +1098,6 @@ class ParseBooleanSearchQueryTest(TestCase):
         def _eq(xy):
             return ["equals", [["ifNull", [xy[0], "''"]], xy[1]]]
 
-        def _m(x):
-            return ["notEquals", [["positionCaseInsensitive", ["message", u"'{}'".format(x)]], 0]]
-
         tests = [
             (
                 "(a:b OR (c:d AND (e:f OR (g:h AND e:f))))",
@@ -1665,6 +1662,48 @@ class ParseBooleanSearchQueryTest(TestCase):
         ):
             get_filter("(OR a:b) AND c:d")
 
+    # TODO (evanh): The situation with the next two tests is not ideal, since we should
+    # be matching the entire query instead of splitting on the brackets. However it's
+    # very difficult to write a regex that can tell the difference between a ParenExpression
+    # and a arbitrary search with parens in it. Once we switch tokenizers we can have something
+    # that can correctly classify these expressions.
+    def test_empty_parens_in_message_not_boolean_search(self):
+        def _m(x):
+            return [["positionCaseInsensitive", ["message", "'{}'".format(x)]], "!=", 0]
+
+        result = get_filter(
+            "failure_rate():>0.003&& users:>10 event.type:transaction",
+            params={"organization_id": self.organization.id, "project_id": [self.project.id]},
+        )
+        assert result.conditions == [
+            _m("failure_rate"),
+            _m(":>0.003&&"),
+            [["ifNull", ["users", "''"]], "=", ">10"],
+            ["event.type", "=", "transaction"],
+        ]
+
+    def test_parens_around_message(self):
+        def _m(x):
+            return ["notEquals", [["positionCaseInsensitive", ["message", u"'{}'".format(x)]], 0]]
+
+        result = get_filter(
+            "TypeError Anonymous function(app/javascript/utils/transform-object-keys)",
+            params={"organization_id": self.organization.id, "project_id": [self.project.id]},
+        )
+        assert result.conditions == [
+            [
+                [
+                    "and",
+                    [
+                        _m("TypeError Anonymous function"),
+                        _m("app/javascript/utils/transform-object-keys"),
+                    ],
+                ],
+                "=",
+                1,
+            ],
+        ]
+
 
 class GetSnubaQueryArgsTest(TestCase):
     def test_simple(self):

+ 16 - 0
tests/sentry/api/test_issue_search.py

@@ -123,6 +123,22 @@ class ParseSearchQueryTest(TestCase):
             ):
                 parse_search_query(invalid_query)
 
+    def test_parens_in_query(self):
+        assert parse_search_query(
+            "TypeError Anonymous function(app/javascript/utils/transform-object-keys)"
+        ) == [
+            SearchFilter(
+                key=SearchKey(name="message"),
+                operator="=",
+                value=SearchValue(raw_value="TypeError Anonymous function"),
+            ),
+            SearchFilter(
+                key=SearchKey(name="message"),
+                operator="=",
+                value=SearchValue(raw_value="(app/javascript/utils/transform-object-keys)"),
+            ),
+        ]
+
 
 class ConvertQueryValuesTest(TestCase):
     def test_valid_converter(self):

+ 45 - 0
tests/snuba/api/endpoints/test_organization_events_v2.py

@@ -2512,6 +2512,51 @@ class OrganizationEventsV2EndpointTest(APITestCase, SnubaTestCase):
         assert data[0]["issue.id"] == event.group_id
         assert data[0]["count_id"] == 2
 
+    def test_messed_up_function_values(self):
+        # TODO (evanh): It would be nice if this surfaced an error to the user.
+        # The problem: The && causes the parser to treat that term not as a bad
+        # function call but a valid raw search with parens in it. It's not trivial
+        # to change the parser to recognize "bad function values" and surface them.
+        self.login_as(user=self.user)
+        project = self.create_project()
+        for v in ["a", "b"]:
+            self.store_event(
+                data={
+                    "event_id": v * 32,
+                    "timestamp": self.two_min_ago,
+                    "fingerprint": ["group_1"],
+                },
+                project_id=project.id,
+            )
+
+        with self.feature(
+            {"organizations:discover-basic": True, "organizations:global-views": True}
+        ):
+            response = self.client.get(
+                self.url,
+                format="json",
+                data={
+                    "field": [
+                        "transaction",
+                        "project",
+                        "epm()",
+                        "p50()",
+                        "p95()",
+                        "failure_rate()",
+                        "apdex(300)",
+                        "count_unique(user)",
+                        "user_misery(300)",
+                    ],
+                    "query": "failure_rate():>0.003&& users:>10 event.type:transaction",
+                    "sort": "-failure_rate",
+                    "statsPeriod": "24h",
+                },
+            )
+
+        assert response.status_code == 200, response.content
+        data = response.data["data"]
+        assert len(data) == 0
+
     def test_context_fields(self):
         self.login_as(user=self.user)
         project = self.create_project()