8 months ago · 88ac464d01
--- a/src/sentry/api/event_search.py
+++ b/src/sentry/api/event_search.py
@@ -387,6 +387,60 @@ class SearchValue(NamedTuple):
 
				             return False
			
 
				         return bool(WILDCARD_CHARS.search(self.raw_value))
			
 
				 
			
 
				+    def classify_wildcard(self) -> Literal["prefix", "infix", "suffix", "other"]:
			
 
				+        if not self.is_wildcard():
			
 
				+            return "other"
			
 
				+
			
 
				+        ret = WILDCARD_CHARS.finditer(self.raw_value)
			
 
				+
			
 
				+        leading_wildcard = False
			
 
				+        trailing_wildcard = False
			
 
				+        middle_wildcard = False
			
 
				+
			
 
				+        for x in ret:
			
 
				+            start, end = x.span()
			
 
				+            if start == 0 and end == 1:
			
 
				+                # It must span exactly [0, 1) because if it spans further,
			
 
				+                # the pattern also matched on some leading slashes.
			
 
				+                leading_wildcard = True
			
 
				+            elif end == len(self.raw_value):
			
 
				+                # It only needs to match on end because if it matches on
			
 
				+                # some slashes before the *, that's okay.
			
 
				+                trailing_wildcard = True
			
 
				+            else:
			
 
				+                # The wildcard happens somewhere in the middle of the value.
			
 
				+                # We care about this because when this happens, it's not
			
 
				+                # trivial to optimize the query, so let it fall back to
			
 
				+                # the existing regex approach.
			
 
				+                middle_wildcard = True
			
 
				+
			
 
				+        if not middle_wildcard:
			
 
				+            if leading_wildcard and trailing_wildcard:
			
 
				+                return "infix"
			
 
				+            elif leading_wildcard:
			
 
				+                return "suffix"
			
 
				+            elif trailing_wildcard:
			
 
				+                return "prefix"
			
 
				+
			
 
				+        return "other"
			
 
				+
			
 
				+    def format_wildcard(self, kind: Literal["prefix", "infix", "suffix", "other"]) -> str:
			
 
				+        if kind == "prefix":
			
 
				+            # If it's a prefix wildcard, we strip off the last character
			
 
				+            # which is always a `*` and match on the rest.
			
 
				+            return translate_escape_sequences(self.raw_value[:-1])
			
 
				+        elif kind == "infix":
			
 
				+            # If it's an infix wildcard, we strip off the first and last character
			
 
				+            # which is always a `*` and match on the rest.
			
 
				+            return translate_escape_sequences(self.raw_value[1:-1])
			
 
				+        elif kind == "suffix":
			
 
				+            # If it's a suffix wildcard, we strip off the first character
			
 
				+            # which is always a `*` and match on the rest.
			
 
				+            return translate_escape_sequences(self.raw_value[1:])
			
 
				+
			
 
				+        # Fall back to the usual formatting that includes formatting escape values.
			
 
				+        return self.value
			
 
				+
			
 
				     def is_event_id(self) -> bool:
			
 
				         """Return whether the current value is a valid event id
			
 
				 
			
--- a/src/sentry/search/events/builder/base.py
+++ b/src/sentry/search/events/builder/base.py
@@ -1287,18 +1287,40 @@ class BaseQueryBuilder:
 
				             is_null_condition = Condition(Function("isNull", [lhs]), Op.EQ, 1)
			
 
				 
			
 
				         if search_filter.value.is_wildcard():
			
 
				-            raw_value = str(search_filter.value.raw_value)
			
 
				-            new_value = event_search.SearchValue(raw_value[1:-1])
			
 
				-            if (
			
 
				-                raw_value.startswith("*")
			
 
				-                and raw_value.endswith("*")
			
 
				-                and not new_value.is_wildcard()
			
 
				-            ):
			
 
				-                # This is an optimization to avoid using regular expressions
			
 
				-                # for wild card searches if it can be avoided.
			
 
				-                # Here, we're just interested if the substring exists.
			
 
				+            kind = (
			
 
				+                search_filter.value.classify_wildcard()
			
 
				+                if self.config.optimize_wildcard_searches
			
 
				+                else "other"
			
 
				+            )
			
 
				+            if kind == "prefix":
			
 
				+                condition = Condition(
			
 
				+                    Function(
			
 
				+                        "startsWith",
			
 
				+                        [
			
 
				+                            Function("lower", [lhs]),
			
 
				+                            search_filter.value.format_wildcard(kind).lower(),
			
 
				+                        ],
			
 
				+                    ),
			
 
				+                    Op.EQ if search_filter.operator in constants.EQUALITY_OPERATORS else Op.NEQ,
			
 
				+                    1,
			
 
				+                )
			
 
				+            elif kind == "suffix":
			
 
				+                condition = Condition(
			
 
				+                    Function(
			
 
				+                        "endsWith",
			
 
				+                        [
			
 
				+                            Function("lower", [lhs]),
			
 
				+                            search_filter.value.format_wildcard(kind).lower(),
			
 
				+                        ],
			
 
				+                    ),
			
 
				+                    Op.EQ if search_filter.operator in constants.EQUALITY_OPERATORS else Op.NEQ,
			
 
				+                    1,
			
 
				+                )
			
 
				+            elif kind == "infix":
			
 
				                 condition = Condition(
			
 
				-                    Function("positionCaseInsensitive", [lhs, new_value.value]),
			
 
				+                    Function(
			
 
				+                        "positionCaseInsensitive", [lhs, search_filter.value.format_wildcard(kind)]
			
 
				+                    ),
			
 
				                     Op.NEQ if search_filter.operator in constants.EQUALITY_OPERATORS else Op.EQ,
			
 
				                     0,
			
 
				                 )
			
--- a/src/sentry/search/events/datasets/base.py
+++ b/src/sentry/search/events/datasets/base.py
@@ -19,6 +19,7 @@ class DatasetConfig(abc.ABC):
 
				     custom_threshold_columns: set[str] = set()
			
 
				     non_nullable_keys: set[str] = set()
			
 
				     missing_function_error: ClassVar[type[Exception]] = InvalidSearchQuery
			
 
				+    optimize_wildcard_searches = False
			
 
				 
			
 
				     def __init__(self, builder: BaseQueryBuilder):
			
 
				         pass
			
--- a/src/sentry/search/events/datasets/spans_indexed.py
+++ b/src/sentry/search/events/datasets/spans_indexed.py
@@ -26,6 +26,8 @@ from sentry.search.utils import DEVICE_CLASS
 
				 
			
 
				 
			
 
				 class SpansIndexedDatasetConfig(DatasetConfig):
			
 
				+    optimize_wildcard_searches = True
			
 
				+
			
 
				     def __init__(self, builder: builder.QueryBuilder):
			
 
				         self.builder = builder
			
 
				         self.total_count: int | None = None
			
--- a/tests/sentry/api/test_event_search.py
+++ b/tests/sentry/api/test_event_search.py
@@ -710,3 +710,32 @@ def test_search_value_to_query_string(value, expected_query_string):
 
				     actual = search_value.to_query_string()
			
 
				 
			
 
				     assert actual == expected_query_string
			
 
				+
			
 
				+
			
 
				+@pytest.mark.parametrize(
			
 
				+    ["value", "expected_kind", "expected_value"],
			
 
				+    [
			
 
				+        (1, "other", 1),
			
 
				+        ("1", "other", "1"),
			
 
				+        ("*", "suffix", ""),  # consider special casing this
			
 
				+        ("*foo", "suffix", "foo"),
			
 
				+        ("foo*", "prefix", "foo"),
			
 
				+        ("*foo*", "infix", "foo"),
			
 
				+        (r"\*foo", "other", r"*foo"),
			
 
				+        (r"\\*foo", "other", r"^\\.*foo$"),
			
 
				+        (r"foo\*", "other", r"foo*"),
			
 
				+        (r"foo\\*", "prefix", r"foo\\"),
			
 
				+        ("*f*o*o*", "other", "^.*f.*o.*o.*$"),
			
 
				+        (r"*foo\*", "suffix", r"foo*"),
			
 
				+        (r"*foo\\*", "infix", r"foo\\"),
			
 
				+    ],
			
 
				+)
			
 
				+def test_search_value_classify_and_format_wildcard(value, expected_kind, expected_value):
			
 
				+    """
			
 
				+    Test classifying the wildcard type into one of prefix/suffix/infix/other
			
 
				+    and formatting the value according to the classification results.
			
 
				+    """
			
 
				+    search_value = SearchValue(value)
			
 
				+    kind = search_value.classify_wildcard()
			
 
				+    assert kind == expected_kind
			
 
				+    assert search_value.format_wildcard(kind) == expected_value
			
--- a/tests/sentry/search/events/builder/test_spans_indexed.py
+++ b/tests/sentry/search/events/builder/test_spans_indexed.py
@@ -72,6 +72,10 @@ def test_field_alias(params, field, expected):
 
				     assert expected in builder.columns
			
 
				 
			
 
				 
			
 
				+def tags(key, column="tags"):
			
 
				+    return Function("ifNull", [Column(f"{column}[{key}]"), ""])
			
 
				+
			
 
				+
			
 
				 @pytest.mark.parametrize(
			
 
				     ["condition", "expected"],
			
 
				     [
			
@@ -116,6 +120,56 @@ def test_field_alias(params, field, expected):
 
				             Condition(Column("span_status"), Op.IN, [3, 5]),
			
 
				             id="span.status:[invalid_argument,not_found]",
			
 
				         ),
			
 
				+        pytest.param(
			
 
				+            "foo:*bar*",
			
 
				+            Condition(Function("positionCaseInsensitive", [tags("foo"), "bar"]), Op.NEQ, 0),
			
 
				+            id="foo:*bar*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            "!foo:*bar*",
			
 
				+            Condition(Function("positionCaseInsensitive", [tags("foo"), "bar"]), Op.EQ, 0),
			
 
				+            id="!foo:*bar*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"foo:Bar*",
			
 
				+            Condition(Function("startsWith", [Function("lower", [tags("foo")]), "bar"]), Op.EQ, 1),
			
 
				+            id=r"foo:Bar*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"!foo:Bar*",
			
 
				+            Condition(Function("startsWith", [Function("lower", [tags("foo")]), "bar"]), Op.NEQ, 1),
			
 
				+            id=r"!foo:Bar*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"foo:*Bar",
			
 
				+            Condition(Function("endsWith", [Function("lower", [tags("foo")]), "bar"]), Op.EQ, 1),
			
 
				+            id=r"foo:*Bar",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"!foo:*Bar",
			
 
				+            Condition(Function("endsWith", [Function("lower", [tags("foo")]), "bar"]), Op.NEQ, 1),
			
 
				+            id=r"!foo:*Bar",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"foo:*Bar\*",
			
 
				+            Condition(Function("endsWith", [Function("lower", [tags("foo")]), "bar*"]), Op.EQ, 1),
			
 
				+            id=r"foo:*Bar\*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"!foo:*Bar\*",
			
 
				+            Condition(Function("endsWith", [Function("lower", [tags("foo")]), "bar*"]), Op.NEQ, 1),
			
 
				+            id=r"!foo:*Bar\*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"foo:*b*a*r*",
			
 
				+            Condition(Function("match", [tags("foo"), "(?i)^.*b.*a.*r.*$"]), Op.EQ, 1),
			
 
				+            id=r"foo:*b*a*r*",
			
 
				+        ),
			
 
				+        pytest.param(
			
 
				+            r"!foo:*b*a*r*",
			
 
				+            Condition(Function("match", [tags("foo"), "(?i)^.*b.*a.*r.*$"]), Op.NEQ, 1),
			
 
				+            id=r"!foo:*b*a*r*",
			
 
				+        ),
			
 
				     ],
			
 
				 )
			
 
				 @django_db_all
			
--- a/tests/snuba/api/endpoints/test_organization_events_span_indexed.py
+++ b/tests/snuba/api/endpoints/test_organization_events_span_indexed.py
@@ -376,3 +376,34 @@ class OrganizationEventsSpanIndexedEndpointTest(OrganizationEventsEndpointTestBa
 
				         assert data[0]["measurements.messaging.message.body.size"] == 1024
			
 
				         assert data[0]["measurements.messaging.message.retry.count"] == 2
			
 
				         assert meta["dataset"] == "spansIndexed"
			
 
				+
			
 
				+    def test_tag_wildcards(self):
			
 
				+        self.store_spans(
			
 
				+            [
			
 
				+                self.create_span(
			
 
				+                    {"description": "foo", "tags": {"foo": "BaR"}},
			
 
				+                    start_ts=self.ten_mins_ago,
			
 
				+                ),
			
 
				+                self.create_span(
			
 
				+                    {"description": "qux", "tags": {"foo": "QuX"}},
			
 
				+                    start_ts=self.ten_mins_ago,
			
 
				+                ),
			
 
				+            ]
			
 
				+        )
			
 
				+
			
 
				+        for query in [
			
 
				+            "foo:b*",
			
 
				+            "foo:*r",
			
 
				+            "foo:*a*",
			
 
				+            "foo:b*r",
			
 
				+        ]:
			
 
				+            response = self.do_request(
			
 
				+                {
			
 
				+                    "field": ["foo", "count()"],
			
 
				+                    "query": query,
			
 
				+                    "project": self.project.id,
			
 
				+                    "dataset": "spansIndexed",
			
 
				+                }
			
 
				+            )
			
 
				+            assert response.status_code == 200, response.content
			
 
				+            assert response.data["data"] == [{"foo": "BaR", "count()": 1}]