Browse Source

fix(performance): Handle escaped asterisks in search (#27152)

Previously, any asterisks in the search are treated as a wildcard search. This
change ensures that when an asterisk is escaped with a preceding backslash, it
is correctly seen as a literal and does not result in a wildcard search.
Tony Xiao 3 years ago
parent
commit
0f32584895

+ 29 - 1
src/sentry/api/event_search.py

@@ -32,7 +32,10 @@ from sentry.utils.compat import filter, map
 from sentry.utils.snuba import is_duration_measurement, is_measurement, is_span_op_breakdown
 from sentry.utils.validators import is_event_id
 
-WILDCARD_CHARS = re.compile(r"[\*]")
+# A wildcard is an asterisk prefixed by an even number of back slashes.
+# If there are an odd number of back slashes, then the back slash immediately
+# before the asterisk is actually escaping the asterisk.
+WILDCARD_CHARS = re.compile(r"(?<!\\)(\\\\)*\*")
 
 event_search_grammar = Grammar(
     r"""
@@ -194,6 +197,29 @@ def translate_wildcard(pat: str) -> str:
     return "^" + res + "$"
 
 
+def translate_escape_sequences(string: str) -> str:
+    """
+    A non-wildcard pattern can contain escape sequences that we need to handle.
+    - \\* because a single asterisk represents a wildcard, so it needs to be escaped
+    """
+
+    i, n = 0, len(string)
+    res = ""
+    while i < n:
+        c = string[i]
+        i = i + 1
+        if c == "\\" and i < n:
+            d = string[i]
+            if d == "*":
+                i += 1
+                res += d
+            else:
+                res += c
+        else:
+            res += c
+    return res
+
+
 def flatten(children):
     def _flatten(seq):
         # there is a list from search_term and one from free_text, so flatten them.
@@ -294,6 +320,8 @@ class SearchValue(NamedTuple):
     def value(self):
         if self.is_wildcard():
             return translate_wildcard(self.raw_value)
+        elif isinstance(self.raw_value, str):
+            return translate_escape_sequences(self.raw_value)
         return self.raw_value
 
     def is_wildcard(self) -> bool:

+ 1 - 2
static/app/utils/tokenizeSearch.tsx

@@ -406,6 +406,5 @@ function escapeTagValue(value: string) {
   // Need to dig deeper to see where exactly it's wrong.
   //
   // astericks (*) is used for wildcard searches
-  // back slaches (\) is used to escape other characters
-  return typeof value === 'string' ? value.replace(/([\*\\])/g, '\\$1') : value;
+  return typeof value === 'string' ? value.replace(/([\*])/g, '\\$1') : value;
 }

+ 2 - 2
tests/js/spec/utils/tokenizeSearch.spec.jsx

@@ -219,11 +219,11 @@ describe('utils/tokenizeSearch', function () {
       expect(results.formatString()).toEqual('a:a b:b c:c1 c:c2 d:d');
 
       results.addTagValues('e', ['e1*e2\\e3']);
-      expect(results.formatString()).toEqual('a:a b:b c:c1 c:c2 d:d e:"e1\\*e2\\\\e3"');
+      expect(results.formatString()).toEqual('a:a b:b c:c1 c:c2 d:d e:"e1\\*e2\\e3"');
 
       results.addStringTag('d:d2');
       expect(results.formatString()).toEqual(
-        'a:a b:b c:c1 c:c2 d:d e:"e1\\*e2\\\\e3" d:d2'
+        'a:a b:b c:c1 c:c2 d:d e:"e1\\*e2\\e3" d:d2'
       );
     });
 

+ 75 - 0
tests/sentry/api/test_event_search.py

@@ -398,3 +398,78 @@ class ParseSearchQueryBackendTest(unittest.TestCase):
             InvalidSearchQuery, ".*queries are not supported in this search.*"
         ):
             parse_search_query("is:unassigned")
+
+    def test_escaping_asterisk(self):
+        # the asterisk is escaped with a preceding backslash, so it's a literal and not a wildcard
+        search_filter = parse_search_query(r"title:a\*b")
+        assert search_filter == [
+            SearchFilter(key=SearchKey(name="title"), operator="=", value=SearchValue(r"a\*b"))
+        ]
+        search_filter = search_filter[0]
+        # the slash should be removed in the final value
+        assert search_filter.value.value == "a*b"
+
+        # the first and last asterisks arent escaped with a preceding backslash, so they're
+        # wildcards and not literals
+        search_filter = parse_search_query(r"title:*\**")
+        assert search_filter == [
+            SearchFilter(key=SearchKey(name="title"), operator="=", value=SearchValue(r"*\**"))
+        ]
+        search_filter = search_filter[0]
+        assert search_filter.value.value == r"^.*\*.*$"
+
+    @pytest.mark.xfail(reason="escaping backslashes is not supported yet")
+    def test_escaping_backslashes(self):
+        search_filter = parse_search_query(r"title:a\\b")
+        assert search_filter == [
+            SearchFilter(key=SearchKey(name="title"), operator="=", value=SearchValue(r"a\\b"))
+        ]
+        search_filter = search_filter[0]
+        # the extra slash should be removed in the final value
+        assert search_filter.value.value == r"a\b"
+
+    @pytest.mark.xfail(reason="escaping backslashes is not supported yet")
+    def test_trailing_escaping_backslashes(self):
+        search_filter = parse_search_query(r"title:a\\")
+        assert search_filter == [
+            SearchFilter(key=SearchKey(name="title"), operator="=", value=SearchValue(r"a\\"))
+        ]
+        search_filter = search_filter[0]
+        # the extra slash should be removed in the final value
+        assert search_filter.value.value == "a\\"
+
+    def test_escaping_quotes(self):
+        search_filter = parse_search_query(r"title:a\"b")
+        assert search_filter == [
+            SearchFilter(key=SearchKey(name="title"), operator="=", value=SearchValue(r'a"b'))
+        ]
+        search_filter = search_filter[0]
+        # the slash should be removed in the final value
+        assert search_filter.value.value == 'a"b'
+
+
+@pytest.mark.parametrize(
+    "raw,result",
+    [
+        (r"", r""),
+        (r"foo", r"foo"),
+        (r"foo*bar", r"^foo.*bar$"),
+        (r"foo\*bar", r"foo*bar"),
+        (r"foo\\*bar", r"^foo\\.*bar$"),
+        (r"foo\\\*bar", r"foo\\*bar"),
+        (r"foo*", r"^foo.*$"),
+        (r"foo\*", r"foo*"),
+        (r"foo\\*", r"^foo\\.*$"),
+        (r"foo\\\*", r"foo\\*"),
+        (r"*bar", r"^.*bar$"),
+        (r"\*bar", r"*bar"),
+        (r"\\*bar", r"^\\.*bar$"),
+        (r"\\\*bar", r"\\*bar"),
+        (r"*\**", r"^.*\*.*$"),
+        (r"\*a\*b\*c\*", r"*a*b*c*"),
+        (r"\*\*\*aaa\*\*\*", r"***aaa***"),
+    ],
+)
+def test_search_value(raw, result):
+    search_value = SearchValue(raw)
+    assert search_value.value == result

+ 3 - 5
tests/sentry/search/events/test_filter.py

@@ -840,12 +840,10 @@ class GetSnubaQueryArgsTest(TestCase):
 
     def test_escaped_wildcard(self):
         assert get_filter("release:3.1.\\* user.email:\\*@example.com").conditions == [
-            [["match", ["release", "'(?i)^3\\.1\\.\\*$'"]], "=", 1],
-            [["match", ["user.email", "'(?i)^\\*@example\\.com$'"]], "=", 1],
-        ]
-        assert get_filter("release:\\\\\\*").conditions == [
-            [["match", ["release", "'(?i)^\\\\\\*$'"]], "=", 1]
+            ["release", "=", "3.1.*"],
+            ["user.email", "=", "*@example.com"],
         ]
+        assert get_filter("release:\\\\\\*").conditions == [["release", "=", "\\\\*"]]
         assert get_filter("release:\\\\*").conditions == [
             [["match", ["release", "'(?i)^\\\\.*$'"]], "=", 1]
         ]

+ 29 - 0
tests/snuba/api/endpoints/test_organization_events_v2.py

@@ -4394,3 +4394,32 @@ class OrganizationEventsV2EndpointTest(APITestCase, SnubaTestCase):
         response = self.do_request(query)
         assert response.status_code == 200
         assert len(response.data["data"]) == 0
+
+    def test_filters_with_escaped_asterisk(self):
+        data = load_data("transaction", timestamp=before_now(minutes=1))
+        data["transaction"] = r"/:a*/:b-:c(\d\.\e+)"
+        self.store_event(data, project_id=self.project.id)
+
+        query = {
+            "field": ["transaction", "transaction.duration"],
+            # make sure to escape the asterisk so it's not treated as a wildcard
+            "query": r'transaction:"/:a\*/:b-:c(\d\.\e+)"',
+            "project": [self.project.id],
+        }
+        response = self.do_request(query)
+        assert response.status_code == 200
+        assert len(response.data["data"]) == 1
+
+    def test_filters_with_back_slashes(self):
+        data = load_data("transaction", timestamp=before_now(minutes=1))
+        data["transaction"] = r"a\b\c@d"
+        self.store_event(data, project_id=self.project.id)
+
+        query = {
+            "field": ["transaction", "transaction.duration"],
+            "query": r'transaction:"a\b\c@d"',
+            "project": [self.project.id],
+        }
+        response = self.do_request(query)
+        assert response.status_code == 200
+        assert len(response.data["data"]) == 1