Browse Source

feat(metrics): Use QueryFilter to parse tag filters [INGEST-542] [INGEST-705] (#30289)

Replace the primitive filter parser with the existing QueryFilter to
allow for more complex queries, just like sessions_v2 uses get_filter to
parse its queries.

This might be too permissive, so we should write our own parser +
interpreter in the long run.
Joris Bayer 3 years ago
parent
commit
1c0650aefa

+ 44 - 74
src/sentry/snuba/metrics.py

@@ -22,24 +22,14 @@ from typing import (
     Union,
 )
 
-from snuba_sdk import (
-    And,
-    Column,
-    Condition,
-    Entity,
-    Function,
-    Granularity,
-    Limit,
-    Offset,
-    Op,
-    Or,
-    Query,
-)
+from snuba_sdk import Column, Condition, Entity, Function, Granularity, Limit, Offset, Op, Query
 from snuba_sdk.conditions import BooleanCondition
 from snuba_sdk.orderby import Direction, OrderBy
 
 from sentry.api.utils import get_date_range_from_params
+from sentry.exceptions import InvalidSearchQuery
 from sentry.models import Project
+from sentry.search.events.filter import QueryFilter
 from sentry.sentry_metrics import indexer
 from sentry.snuba.dataset import Dataset, EntityKey
 from sentry.snuba.sessions_v2 import (  # TODO: unite metrics and sessions_v2
@@ -106,30 +96,49 @@ def parse_field(field: str) -> Tuple[str, str]:
         return operation, metric_name
 
 
-def verify_tag_name(name: str) -> str:
-
-    if not TAG_REGEX.match(name):
-        raise InvalidParams(f"Invalid tag name: '{name}'")
+def _resolve_tags(input_: Any) -> Any:
+    """Translate tags in snuba condition"""
+    if isinstance(input_, list):
+        return [_resolve_tags(item) for item in input_]
+    if isinstance(input_, Function):
+        return Function(
+            function=input_.function,
+            parameters=input_.parameters and [_resolve_tags(item) for item in input_.parameters],
+        )
+    if isinstance(input_, Condition):
+        return Condition(lhs=_resolve_tags(input_.lhs), op=input_.op, rhs=_resolve_tags(input_.rhs))
+    if isinstance(input_, BooleanCondition):
+        return input_.__class__(conditions=[_resolve_tags(item) for item in input_.conditions])
+    if isinstance(input_, Column):
+        # HACK: Some tags already take the form "tags[...]" in discover, take that into account:
+        if input_.subscriptable == "tags":
+            name = input_.key
+        else:
+            name = input_.name
+        return Column(name=f"tags[{indexer.resolve(name)}]")
+    if isinstance(input_, str):
+        return indexer.resolve(input_)
 
-    return name
+    return input_
 
 
-def parse_tag(tag_string: str) -> Tuple[str, str]:
+def parse_query(query_string: str) -> Sequence[Condition]:
+    """Parse given filter query into a list of snuba conditions"""
+    # HACK: Parse a sessions query, validate / transform afterwards.
+    # We will want to write our own grammar + interpreter for this later.
     try:
-        name, value = tag_string.split(":")
-    except ValueError:
-        raise InvalidParams(f"Expected something like 'foo:\"bar\"' for tag, got '{tag_string}'")
-
-    return (verify_tag_name(name), value.strip('"'))
-
+        query_filter = QueryFilter(
+            Dataset.Sessions,
+            params={
+                "project_id": 0,
+            },
+        )
+        where, _ = query_filter.resolve_conditions(query_string, use_aggregate_conditions=True)
+    except InvalidSearchQuery as e:
+        raise InvalidParams(f"Failed to parse query: {e}")
+    where = _resolve_tags(where)
 
-def parse_query(query_string: str) -> dict:
-    return {
-        "or": [
-            {"and": [parse_tag(and_part) for and_part in or_part.split(" and ")]}
-            for or_part in query_string.split(" or ")
-        ]
-    }
+    return where
 
 
 class QueryDefinition:
@@ -421,7 +430,7 @@ _MEASUREMENT_TAGS = dict(
     _BASE_TAGS,
     **{
         "measurement_rating": ["good", "meh", "poor"],
-        "transaction": ["/foo/:ordId/", "/bar/:ordId/"],
+        "transaction": ["/foo/:orgId/", "/bar/:orgId/"],
     },
 )
 
@@ -653,45 +662,6 @@ class SnubaQueryBuilder:
         self._projects = projects
         self._queries = self._build_queries(query_definition)
 
-    def _build_logical(self, operator, operands) -> Optional[BooleanCondition]:
-        """Snuba only accepts And and Or if they have 2 elements or more"""
-        operands = [operand for operand in operands if operand is not None]
-        if not operands:
-            return None
-        if len(operands) == 1:
-            return operands[0]
-
-        return operator(operands)
-
-    def _build_filter(self, query_definition: QueryDefinition) -> Optional[BooleanCondition]:
-        filter_ = query_definition.parsed_query
-        if filter_ is None:
-            return None
-
-        def to_int(string):
-            try:
-                return indexer.resolve(string)
-            except KeyError:
-                return None
-
-        return self._build_logical(
-            Or,
-            [
-                self._build_logical(
-                    And,
-                    [
-                        Condition(
-                            Column(f"tags[{to_int(tag)}]"),
-                            Op.EQ,
-                            to_int(value),
-                        )
-                        for tag, value in or_operand["and"]
-                    ],
-                )
-                for or_operand in filter_["or"]
-            ],
-        )
-
     def _build_where(
         self, query_definition: QueryDefinition
     ) -> List[Union[BooleanCondition, Condition]]:
@@ -708,9 +678,9 @@ class SnubaQueryBuilder:
             Condition(Column(TS_COL_QUERY), Op.GTE, query_definition.start),
             Condition(Column(TS_COL_QUERY), Op.LT, query_definition.end),
         ]
-        filter_ = self._build_filter(query_definition)
+        filter_ = query_definition.parsed_query
         if filter_:
-            where.append(filter_)
+            where.extend(filter_)
 
         return where
 

+ 8 - 16
tests/sentry/api/endpoints/test_organization_metrics.py

@@ -285,7 +285,6 @@ class OrganizationMetricDataTest(APITestCase):
     @with_feature(FEATURE_FLAG)
     def test_invalid_filter(self):
         for query in [
-            "%w45698u",
             "release:foo or ",
         ]:
 
@@ -300,21 +299,14 @@ class OrganizationMetricDataTest(APITestCase):
 
     @with_feature(FEATURE_FLAG)
     def test_valid_filter(self):
-
-        for query in [
-            "release:",  # Empty string is OK
-            "release:myapp@2.0.0",
-            "release:myapp@2.0.0 and environment:production",
-            "release:myapp@2.0.0 and environment:production or session.status:healthy",
-        ]:
-
-            response = self.get_success_response(
-                self.project.organization.slug,
-                field="sum(session)",
-                groupBy="environment",
-                query=query,
-            )
-            assert response.data.keys() == {"start", "end", "query", "intervals", "groups"}
+        query = "release:myapp@2.0.0"
+        response = self.get_success_response(
+            self.project.organization.slug,
+            field="sum(session)",
+            groupBy="environment",
+            query=query,
+        )
+        assert response.data.keys() == {"start", "end", "query", "intervals", "groups"}
 
     @with_feature(FEATURE_FLAG)
     def test_orderby_unknown(self):

+ 61 - 2
tests/sentry/snuba/test_metrics.py

@@ -7,6 +7,7 @@ import pytz
 from django.utils.datastructures import MultiValueDict
 from freezegun import freeze_time
 from snuba_sdk import (
+    And,
     Column,
     Condition,
     Direction,
@@ -16,6 +17,7 @@ from snuba_sdk import (
     Limit,
     Offset,
     Op,
+    Or,
     OrderBy,
     Query,
 )
@@ -29,6 +31,7 @@ from sentry.snuba.metrics import (
     SnubaResultConverter,
     get_date_range,
     get_intervals,
+    parse_query,
 )
 
 
@@ -41,6 +44,62 @@ class PseudoProject:
 MOCK_NOW = datetime(2021, 8, 25, 23, 59, tzinfo=pytz.utc)
 
 
+@pytest.mark.parametrize(
+    "query_string,expected",
+    [
+        ('release:""', [Condition(Column(name="tags[6]"), Op.IN, rhs=[14])]),
+        ("release:myapp@2.0.0", [Condition(Column(name="tags[6]"), Op.IN, rhs=[15])]),
+        (
+            "release:myapp@2.0.0 and environment:production",
+            [
+                And(
+                    [
+                        Condition(Column(name="tags[6]"), Op.IN, rhs=[15]),
+                        Condition(Column(name="tags[2]"), Op.EQ, rhs=5),
+                    ]
+                )
+            ],
+        ),
+        (
+            "release:myapp@2.0.0 environment:production",
+            [
+                Condition(Column(name="tags[6]"), Op.IN, rhs=[15]),
+                Condition(Column(name="tags[2]"), Op.EQ, rhs=5),
+            ],
+        ),
+        (
+            "release:myapp@2.0.0 and environment:production or session.status:healthy",
+            [
+                Or(
+                    [
+                        And(
+                            [
+                                Condition(Column(name="tags[6]"), Op.IN, rhs=[15]),
+                                Condition(Column(name="tags[2]"), Op.EQ, rhs=5),
+                            ]
+                        ),
+                        Condition(
+                            Function(function="ifNull", parameters=[Column(name="tags[8]"), 14]),
+                            Op.EQ,
+                            rhs=4,
+                        ),
+                    ]
+                ),
+            ],
+        ),
+        ('transaction:"/bar/:orgId/"', [Condition(Column(name="tags[16]"), Op.EQ, rhs=17)]),
+    ],
+)
+@mock.patch("sentry.snuba.metrics.indexer")
+def test_parse_query(mock_indexer, query_string, expected):
+    local_indexer = MockIndexer()
+    for s in ("", "myapp@2.0.0", "transaction", "/bar/:orgId/"):
+        local_indexer.record(s)
+    mock_indexer.resolve = local_indexer.resolve
+    parsed = parse_query(query_string)
+    assert parsed == expected
+
+
 @freeze_time("2018-12-11 03:21:00")
 def test_round_range():
     start, end, interval = get_date_range({"statsPeriod": "2d"})
@@ -142,7 +201,7 @@ def test_build_snuba_query(mock_now, mock_now2, mock_indexer):
                 Condition(Column("metric_id"), Op.IN, [9, 11, 7]),
                 Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
                 Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
-                Condition(Column("tags[6]"), Op.EQ, 10),
+                Condition(Column("tags[6]"), Op.IN, [10]),
             ],
             limit=Limit(MAX_POINTS),
             offset=Offset(0),
@@ -219,7 +278,7 @@ def test_build_snuba_query_orderby(mock_now, mock_now2, mock_indexer):
             Condition(Column("metric_id"), Op.IN, [9]),
             Condition(Column("timestamp"), Op.GTE, datetime(2021, 5, 28, 0, tzinfo=pytz.utc)),
             Condition(Column("timestamp"), Op.LT, datetime(2021, 8, 26, 0, tzinfo=pytz.utc)),
-            Condition(Column("tags[6]", entity=None), Op.EQ, 10),
+            Condition(Column("tags[6]", entity=None), Op.IN, [10]),
         ],
         orderby=[OrderBy(Column("value"), Direction.DESC)],
         limit=Limit(3),