Browse Source

fix(rpc): Handle wildcards and escapes better in rpc (#85375)

We werent handling things well when there's a mix of escaped characters
and wildcards.
Tony Xiao 3 weeks ago
parent
commit
bd0ae23afc

+ 33 - 0
src/sentry/api/event_search.py

@@ -227,6 +227,39 @@ def translate_wildcard(pat: str) -> str:
     return "^" + res + "$"
 
 
+def translate_wildcard_as_clickhouse_pattern(pattern: str) -> str:
+    """
+    Translate a wildcard pattern to clickhouse pattern.
+
+    See https://clickhouse.com/docs/en/sql-reference/functions/string-search-functions#like
+    """
+    chars: list[str] = []
+
+    i = 0
+    n = len(pattern)
+
+    while i < n:
+        c = pattern[i]
+        i += 1
+        if c == "\\" and i < n:
+            c = pattern[i]
+            if c not in {"*"}:
+                raise InvalidSearchQuery(f"Unexpected escape character: {c}")
+            chars.append(c)
+            i += 1
+        elif c == "*":
+            # sql uses % as the wildcard character
+            chars.append("%")
+        elif c in {"%", "_"}:
+            # these are special characters and need to be escaped
+            chars.append("\\")
+            chars.append(c)
+        else:
+            chars.append(c)
+
+    return "".join(chars)
+
+
 def translate_escape_sequences(string: str) -> str:
     """
     A non-wildcard pattern can contain escape sequences that we need to handle.

+ 2 - 9
src/sentry/search/eap/resolver.py

@@ -408,15 +408,8 @@ class SearchResolver:
                 operator = ComparisonFilter.OP_NOT_LIKE
             else:
                 raise InvalidSearchQuery(f"Cannot use a wildcard with a {term.operator} filter")
-            # Slashes have to be double escaped so they are
-            # interpreted as a string literal.
-            value = (
-                str(term.value.raw_value)
-                .replace("\\", "\\\\")
-                .replace("%", "\\%")
-                .replace("_", "\\_")
-                .replace("*", "%")
-            )
+            value = str(term.value.raw_value)
+            value = event_search.translate_wildcard_as_clickhouse_pattern(value)
         elif term.operator in constants.OPERATOR_MAP:
             operator = constants.OPERATOR_MAP[term.operator]
         else:

+ 29 - 0
tests/sentry/api/test_event_search.py

@@ -15,6 +15,7 @@ from sentry.api.event_search import (
     SearchKey,
     SearchValue,
     parse_search_query,
+    translate_wildcard_as_clickhouse_pattern,
 )
 from sentry.constants import MODULE_ROOT
 from sentry.exceptions import InvalidSearchQuery
@@ -756,3 +757,31 @@ def test_search_value_classify_and_format_wildcard(value, expected_kind, expecte
     search_value = SearchValue(value)
     kind, wildcard = search_value.classify_and_format_wildcard()
     assert (kind, wildcard) == (expected_kind, expected_value)
+
+
+@pytest.mark.parametrize(
+    ["pattern", "clickhouse"],
+    [
+        pytest.param("simple", "simple", id="simple"),
+        pytest.param("wild * card", "wild % card", id="wildcard"),
+        pytest.param("under_score", "under\\_score", id="underscore"),
+        pytest.param("per%centage", "per\\%centage", id="percentage"),
+        pytest.param("ast\\*erisk", "ast*erisk", id="asterisk"),
+        pytest.param("c*o_m%p\\*lex", "c%o\\_m\\%p*lex", id="complex"),
+    ],
+)
+def test_translate_wildcard_as_clickhouse_pattern(pattern, clickhouse):
+    assert translate_wildcard_as_clickhouse_pattern(pattern) == clickhouse
+
+
+@pytest.mark.parametrize(
+    ["pattern"],
+    [
+        pytest.param("\\."),
+        pytest.param("\\%"),
+        pytest.param("\\_"),
+    ],
+)
+def test_invalid_translate_wildcard_as_clickhouse_pattern(pattern):
+    with pytest.raises(InvalidSearchQuery):
+        assert translate_wildcard_as_clickhouse_pattern(pattern)

+ 22 - 0
tests/snuba/api/endpoints/test_organization_events_span_indexed.py

@@ -1260,6 +1260,28 @@ class OrganizationEventsSpanIndexedEndpointTest(OrganizationEventsEndpointTestBa
         assert len(response.data["data"]) == 1
         assert response.data["data"][0]["span.description"] == "select * from database"
 
+    def test_wildcard_queries_with_asterisk_literals(self):
+        self.store_spans(
+            [
+                self.create_span(
+                    {"description": "select * from database"},
+                    start_ts=self.ten_mins_ago,
+                ),
+            ],
+            is_eap=self.is_eap,
+        )
+        response = self.do_request(
+            {
+                "field": ["span.description"],
+                "query": 'span.description:"select \\* * database"',
+                "project": self.project.id,
+                "dataset": self.dataset,
+            }
+        )
+        assert response.status_code == 200, response.content
+        assert len(response.data["data"]) == 1
+        assert response.data["data"][0]["span.description"] == "select * from database"
+
 
 class OrganizationEventsEAPSpanEndpointTest(OrganizationEventsSpanIndexedEndpointTest):
     is_eap = True