Browse Source

feat(replays): Add support for filtering click events with a CSS selector (#46701)

Allows a user to enter a CSS selector to search for DOM click events.

Any valid CSS selector will be accepted as input however we do not
interpret selectors the way a browser will. The following selectors are
considered:

- `element`
- Omitting the element from the query defaults to the universal selector
`*`.
- `#id`
- `.class`
- `[attribute=x]`
    - Limited to: `data-testid`, `title`, `role`, `alt`, `aria-label`

You may also use commas (`,`) as OR operators. E.g. `div,button` will
translate to `div OR button`. We do not support any form of DOM nesting.
This means we do not support CSS combinators in selector searches.
Additional selector syntax will be ignored even if the syntax
fundamentally changes the meaning of the query.

Selectors can be AND'ed with the normal Sentry query syntax.

Related: https://github.com/getsentry/sentry/issues/45524
Colton Allen 1 year ago
parent
commit
627ee52518

+ 1 - 1
requirements-base.txt

@@ -8,6 +8,7 @@ celery>=4.4.7
 click>=8.0.4
 confluent-kafka>=1.9.2
 croniter>=0.3.37
+cssselect>=1.0.3
 datadog>=0.29.3
 django-crispy-forms>=1.14.0
 django-pg-zero-downtime-migrations>=0.11
@@ -96,7 +97,6 @@ cffi>=1.15.0
 
 # not directly used, but pinned for toronado because it doesn't pin these
 cssutils>=2.4.0
-cssselect>=1.0.3
 
 # sentry-plugins specific dependencies
 phabricator>=0.7.0

+ 1 - 0
src/sentry/replays/blueprints/api.md

@@ -64,6 +64,7 @@ This document is structured by resource with each resource having actions that c
     | replay_click.testid      | string        | The data-testid of an HTML element. (omitted from public docs) |
     | replay_click.textContent | string        | The text-content of an HTML element.                           |
     | replay_click.title       | string        | The title attribute of an HTML element.                        |
+    | replay_click.selector    | string        | A valid CSS selector.                                          |
 
 ### Browse Replays [GET]
 

+ 58 - 0
src/sentry/replays/lib/query.py

@@ -9,6 +9,8 @@ from snuba_sdk.expressions import Expression
 from snuba_sdk.orderby import Direction, OrderBy
 
 from sentry.api.event_search import ParenExpression, SearchFilter
+from sentry.replays.lib.selector.parse import QueryType, parse_selector
+from sentry.replays.lib.selector.query import union_find
 
 OPERATOR_MAP = {
     "=": Op.EQ,
@@ -136,6 +138,62 @@ class String(Field):
         return super().as_condition(field_alias, operator, value, is_wildcard)
 
 
+class Selector(Field):
+    _operators = [Op.EQ, Op.NEQ]
+    _python_type = str
+
+    def as_condition(
+        self, field_alias: str, operator: Op, value: Union[List[str], str], is_wildcard: bool
+    ) -> Condition:
+        # This list of queries implies an `OR` operation between each item in the set. To `AND`
+        # selector queries apply them separately.
+        queries: List[QueryType] = parse_selector(value)
+
+        # A valid selector will always return at least one query condition. If this did not occur
+        # then the selector was not well-formed. We return an empty resultset.
+        if len(queries) == 0:
+            return Condition(Function("identity", parameters=[1]), Op.EQ, 2)
+
+        # Conditions are pre-made and intended for application in the HAVING clause.
+        conditions: List[Condition] = []
+
+        for query in queries:
+            columns, values = [], []
+
+            if query.alt:
+                columns.append(Column("click_alt"))
+                values.append(query.alt)
+            if query.aria_label:
+                columns.append(Column("click_aria_label"))
+                values.append(query.aria_label)
+            if query.classes:
+                columns.append(Column("click_classes"))
+                values.append(query.classes)
+            if query.id:
+                columns.append(Column("click_id"))
+                values.append(query.id)
+            if query.role:
+                columns.append(Column("click_role"))
+                values.append(query.role)
+            if query.tag:
+                columns.append(Column("click_tag"))
+                values.append(query.tag)
+            if query.testid:
+                columns.append(Column("click_testid"))
+                values.append(query.testid)
+            if query.title:
+                columns.append(Column("click_title"))
+                values.append(query.title)
+
+            if columns and values:
+                conditions.append(Condition(union_find(columns, values), operator, 1))
+
+        if len(conditions) == 1:
+            return conditions[0]
+        else:
+            return Or(conditions)
+
+
 class Number(Field):
     _operators = [Op.EQ, Op.NEQ, Op.GT, Op.GTE, Op.LT, Op.LTE, Op.IN, Op.NOT_IN]
     _python_type = int

+ 0 - 0
src/sentry/replays/lib/selector/__init__.py


+ 102 - 0
src/sentry/replays/lib/selector/parse.py

@@ -0,0 +1,102 @@
+from typing import List, Optional, Union
+
+from cssselect import Selector, SelectorSyntaxError
+from cssselect import parse as cssselect_parse
+from cssselect.parser import Attrib, Class, CombinedSelector, Element, Hash
+from rest_framework.exceptions import ParseError
+
+SelectorType = Union[Attrib, Class, Element, Hash]
+
+
+class QueryType:
+    def __init__(self):
+        self.alt: Optional[str] = None
+        self.aria_label: Optional[str] = None
+        self.classes: List[str] = []
+        self.id: Optional[str] = None
+        self.role: Optional[str] = None
+        self.tag: Optional[str] = None
+        self.testid: Optional[str] = None
+        self.title: Optional[str] = None
+
+
+def parse_selector(css_selector: str) -> List[QueryType]:
+    try:
+        selectors: List[Selector] = cssselect_parse(css_selector)
+    except SelectorSyntaxError:
+        # Invalid selector syntax. No query data can be extracted.
+        return []
+
+    queries: List[QueryType] = []
+    for selector in selectors:
+        if selector.pseudo_element is not None:
+            raise ParseError("Pseudo-elements are not supported.")
+
+        query = QueryType()
+        visit_selector_tree(query, selector.parsed_tree)
+        queries.append(query)
+    return queries
+
+
+def visit_selector_tree(query: QueryType, selector: SelectorType) -> None:
+    """Visit selector tree ignoring unhandled items.
+
+    We intentionally ignore specificity and psuedo-elements.
+
+    Cssselect refers to a selector as a "parsed_tree". While this is true its best thought of as
+    a linked-list. The first element encountered in the list is the last condition defined in the
+    selector text. The "Element" class is always the tail and has no children.
+
+    For example:
+
+        Attrib -> Class -> Hash -> Class -> Attrib -> Element
+    """
+    if isinstance(selector, Attrib):
+        visit_attribute(query, selector)
+        visit_selector_tree(query, selector.selector)
+    elif isinstance(selector, Class):
+        visit_class(query, selector)
+        visit_selector_tree(query, selector.selector)
+    elif isinstance(selector, Element):
+        visit_element(query, selector)
+        return None
+    elif isinstance(selector, Hash):
+        visit_hash(query, selector)
+        visit_selector_tree(query, selector.selector)
+    elif isinstance(selector, CombinedSelector):
+        raise ParseError("Nested selectors are not supported.")
+    else:
+        raise ParseError("Only attribute, class, id, and tag name selectors are supported.")
+
+
+def visit_attribute(query: QueryType, attribute: Attrib) -> None:
+    """Visit attribute selector types."""
+    if attribute.operator != "=":
+        raise ParseError("Only the '=' operator is supported.")
+
+    attrib = attribute.attrib
+    if attrib == "alt":
+        query.alt = attribute.value
+    elif attrib == "aria-label":
+        query.aria_label = attribute.value
+    elif attrib == "role":
+        query.role = attribute.value
+    elif attrib == "data-testid":
+        query.testid = attribute.value
+    elif attrib == "title":
+        query.title = attribute.value
+
+
+def visit_class(query: QueryType, class_: Class) -> None:
+    """Visit class selector types."""
+    query.classes.append(class_.class_name)
+
+
+def visit_element(query: QueryType, element: Element) -> None:
+    """Visit element selector types."""
+    query.tag = element.element
+
+
+def visit_hash(query: QueryType, hash_: Hash) -> None:
+    """Visit hash selector types."""
+    query.id = hash_.id

+ 89 - 0
src/sentry/replays/lib/selector/query.py

@@ -0,0 +1,89 @@
+from typing import List, Union
+
+from snuba_sdk import Column, Function, Identifier, Lambda
+
+
+def union_find(arrs: List[Column], values: List[Union[str, List[str]]]) -> Function:
+    """Return a row if a union can be formed out of the sets.
+
+    Accepts as input:
+        (arrs=[[a, b, c], [d, e, c]], values=[a, c])
+
+    Values and arrays are zipped:
+        [(a, [a, b, c]), (c, [c, c, d])]
+
+    A bitmask is applied:
+        [[1, 0, 0], [1, 1, 0]]
+
+    The results are zipped into like tuples:
+        [(1, 1), (0, 1), (0, 0)]
+
+    Tuples are filtered for exact matches:
+        [(1, 1)]
+
+    If the array is not empty a truthy value is returned:
+        1
+    """
+    if len(arrs) == 0 and len(values) == 0:
+        raise ValueError("Must filter against more than one column.")
+    elif len(arrs) != len(values):
+        # Programmer error.
+        raise ValueError("Mismatched number of arrays and values.")
+
+    return Function(
+        "notEmpty",
+        parameters=[
+            Function(
+                "arrayFilter",
+                parameters=[
+                    Lambda(
+                        ["tuple"],
+                        Function("equals", parameters=[Identifier("tuple"), (1,) * len(arrs)]),
+                    ),
+                    Function(
+                        "arrayZip",
+                        parameters=[
+                            _map_bitmask_operation(arr, value) for arr, value in zip(arrs, values)
+                        ],
+                    ),
+                ],
+            )
+        ],
+    )
+
+
+def _map_bitmask_operation(arr: Column, value: Union[str, List[str]]) -> Function:
+    """List type values require special handling.
+
+    This is only applicable to the class array.
+    """
+    if isinstance(value, list):
+        return _apply_array_bitmask(arr, value)
+    else:
+        return _apply_scalar_bitmask(arr, value)
+
+
+def _apply_array_bitmask(arr: Column, subset: List[str]) -> Function:
+    """Return `1` for the index position if the subset is contained within the set.
+
+    Accepts as input:
+        [[1], [2], [2, 3]], [2, 3]
+
+    Returns as output:
+        [0, 0, 1]
+    """
+    map_fn = Lambda(["set"], Function("hasAll", parameters=[Identifier("set"), subset]))
+    return Function("arrayMap", parameters=[map_fn, arr])
+
+
+def _apply_scalar_bitmask(arr: Column, value: str) -> Function:
+    """Return `1` for the index position if the value matches the scalar at that position.
+
+    Accepts as input:
+        [1, 2, 3], 3
+
+    Returns as output:
+        [0, 0, 1]
+    """
+    map_fn = Lambda(["item"], Function("equals", parameters=[Identifier("item"), value]))
+    return Function("arrayMap", parameters=[map_fn, arr])

+ 2 - 1
src/sentry/replays/post_process.py

@@ -81,7 +81,8 @@ def generate_normalized_output(
 
         item.pop("click_alt", None)
         item.pop("click_aria_label", None)
-        item.pop("click_class", None)
+        item.pop("clickClass", None)
+        item.pop("click_classes", None)
         item.pop("click_id", None)
         item.pop("click_role", None)
         item.pop("click_tag", None)

+ 20 - 2
src/sentry/replays/query.py

@@ -26,6 +26,7 @@ from sentry.replays.lib.query import (
     ListField,
     Number,
     QueryConfig,
+    Selector,
     String,
     Tag,
     UUIDField,
@@ -428,7 +429,9 @@ class ReplayQueryConfig(QueryConfig):
 
     # Click
     click_alt = ListField(field_alias="replay_click.alt", is_sortable=False)
-    click_class = ListField(field_alias="replay_click.class", is_sortable=False)
+    click_class = ListField(
+        field_alias="replay_click.class", query_alias="clickClass", is_sortable=False
+    )
     click_id = ListField(field_alias="replay_click.id", is_sortable=False)
     click_aria_label = ListField(field_alias="replay_click.label", is_sortable=False)
     click_role = ListField(field_alias="replay_click.role", is_sortable=False)
@@ -436,6 +439,7 @@ class ReplayQueryConfig(QueryConfig):
     click_testid = ListField(field_alias="replay_click.testid", is_sortable=False)
     click_text = ListField(field_alias="replay_click.textContent", is_sortable=False)
     click_title = ListField(field_alias="replay_click.title", is_sortable=False)
+    click_selector = Selector(field_alias="replay_click.selector", is_sortable=False)
 
     # Tag
     tags = Tag(field_alias="*")
@@ -596,6 +600,17 @@ FIELD_QUERY_ALIAS_MAP: Dict[str, List[str]] = {
     "replay_click.testid": ["click.testid"],
     "replay_click.textContent": ["click.text"],
     "replay_click.title": ["click.title"],
+    "replay_click.selector": [
+        "click.alt",
+        "click.aria_label",
+        "click.classes",
+        "click.id",
+        "click.role",
+        "click.tag",
+        "click.testid",
+        "click.text",
+        "click.title",
+    ],
 }
 
 
@@ -695,7 +710,10 @@ QUERY_ALIAS_COLUMN_MAP = {
         "groupArray", parameters=[Column("click_aria_label")], alias="click_aria_label"
     ),
     "click.class": Function(
-        "groupArrayArray", parameters=[Column("click_class")], alias="click_class"
+        "groupArrayArray", parameters=[Column("click_class")], alias="clickClass"
+    ),
+    "click.classes": Function(
+        "groupArray", parameters=[Column("click_class")], alias="click_classes"
     ),
     "click.id": Function("groupArray", parameters=[Column("click_id")], alias="click_id"),
     "click.role": Function("groupArray", parameters=[Column("click_role")], alias="click_role"),

+ 85 - 0
tests/sentry/replays/test_organization_replay_index.py

@@ -921,6 +921,15 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
                 "replay_click.testid:1",
                 "replay_click.textContent:Hello",
                 "replay_click.title:MyTitle",
+                "replay_click.selector:div#myid",
+                "replay_click.selector:div[alt=Alt]",
+                "replay_click.selector:div[title=MyTitle]",
+                "replay_click.selector:div[data-testid='1']",
+                "replay_click.selector:div[role=button]",
+                "replay_click.selector:div#myid.class1.class2",
+                # Single quotes around attribute value.
+                "replay_click.selector:div[role='button']",
+                "replay_click.selector:div#myid.class1.class2[role=button][aria-label='AriaLabel']",
             ]
             for query in queries:
                 response = self.client.get(self.url + f"?field=id&query={query}")
@@ -938,9 +947,85 @@ class OrganizationReplayIndexTest(APITestCase, ReplaysSnubaTestCase):
                 "replay_click.testid:2",
                 "replay_click.textContent:World",
                 "replay_click.title:NotMyTitle",
+                "!replay_click.selector:div#myid",
+                "replay_click.selector:div#notmyid",
+                # Assert all classes must match.
+                "replay_click.selector:div#myid.class1.class2.class3",
+                # Invalid selectors return no rows.
+                "replay_click.selector:$#%^#%",
+                # Integer type role values are not allowed and must be wrapped in single quotes.
+                "replay_click.selector:div[title=1]",
             ]
             for query in queries:
                 response = self.client.get(self.url + f"?query={query}")
                 assert response.status_code == 200, query
                 response_data = response.json()
                 assert len(response_data["data"]) == 0, query
+
+    def test_get_replays_filter_clicks_nested_selector(self):
+        """Test replays do not support nested selectors."""
+        project = self.create_project(teams=[self.team])
+        self.store_replays(mock_replay(datetime.datetime.now(), project.id, uuid.uuid4().hex))
+
+        with self.feature(REPLAYS_FEATURES):
+            queries = [
+                'replay_click.selector:"div button"',
+                'replay_click.selector:"div + button"',
+                'replay_click.selector:"div ~ button"',
+                'replay_click.selector:"div > button"',
+            ]
+            for query in queries:
+                response = self.client.get(self.url + f"?field=id&query={query}")
+                assert response.status_code == 400
+                assert response.content == b'{"detail":"Nested selectors are not supported."}'
+
+    def test_get_replays_filter_clicks_pseudo_element(self):
+        """Assert replays only supports a subset of selector syntax."""
+        project = self.create_project(teams=[self.team])
+        self.store_replays(mock_replay(datetime.datetime.now(), project.id, uuid.uuid4().hex))
+
+        with self.feature(REPLAYS_FEATURES):
+            queries = [
+                "replay_click.selector:a::visited",
+            ]
+            for query in queries:
+                response = self.client.get(self.url + f"?field=id&query={query}")
+                assert response.status_code == 400, query
+                assert response.content == b'{"detail":"Pseudo-elements are not supported."}', query
+
+    def test_get_replays_filter_clicks_unsupported_selector(self):
+        """Assert replays only supports a subset of selector syntax."""
+        project = self.create_project(teams=[self.team])
+        self.store_replays(mock_replay(datetime.datetime.now(), project.id, uuid.uuid4().hex))
+
+        with self.feature(REPLAYS_FEATURES):
+            queries = [
+                "replay_click.selector:div:is(2)",
+                "replay_click.selector:p:active",
+            ]
+            for query in queries:
+                response = self.client.get(self.url + f"?field=id&query={query}")
+                assert response.status_code == 400, query
+                assert (
+                    response.content
+                    == b'{"detail":"Only attribute, class, id, and tag name selectors are supported."}'
+                ), query
+
+    def test_get_replays_filter_clicks_unsupported_operators(self):
+        """Assert replays only supports a subset of selector syntax."""
+        project = self.create_project(teams=[self.team])
+        self.store_replays(mock_replay(datetime.datetime.now(), project.id, uuid.uuid4().hex))
+
+        with self.feature(REPLAYS_FEATURES):
+            queries = [
+                'replay_click.selector:"[aria-label~=button]"',
+                'replay_click.selector:"[aria-label|=button]"',
+                'replay_click.selector:"[aria-label^=button]"',
+                'replay_click.selector:"[aria-label$=button]"',
+            ]
+            for query in queries:
+                response = self.client.get(self.url + f"?field=id&query={query}")
+                assert response.status_code == 400, query
+                assert (
+                    response.content == b'{"detail":"Only the \'=\' operator is supported."}'
+                ), query