Browse Source

feat(tracing) Add function support to discover backend (#16948)

feat(tracing) Add function support to discover backend

Add backend support for the frontend to send more complicated functions than the
current aliases/aggregates. This can be expanded to ultimately remove the magic
field aliases that map to snuba functions.
evanh 5 years ago
parent
commit
e57fdef346

+ 77 - 0
src/sentry/api/event_search.py

@@ -906,6 +906,79 @@ def validate_aggregate(field, match):
         )
 
 
+FUNCTION_PATTERN = re.compile(r"^(?P<function>[^\(]+)\((?P<columns>[^\)]*)\)$")
+
+NUMERIC_COLUMN = "numeric_column"
+NUMBER = "number"
+
+FUNCTIONS = {
+    "percentile": {
+        "name": "percentile",
+        "args": [
+            {"name": "column", "type": NUMERIC_COLUMN},
+            {
+                "name": "percentile",
+                "type": NUMBER,
+                "validator": lambda v: (v > 0 and v < 1, "not between 0 and 1"),
+            },
+        ],
+        "transform": "quantile(%(percentile).2f)(%(column)s)",
+    }
+}
+
+
+def is_function(field):
+    function_match = FUNCTION_PATTERN.search(field)
+    if function_match and function_match.group("function") in FUNCTIONS:
+        return function_match
+
+
+def get_function_alias(function_name, columns):
+    columns = "_".join(columns).replace(".", "_")
+    return ("%s_%s" % (function_name, columns)).rstrip("_")
+
+
+def resolve_function(field, match=None):
+    if not match:
+        match = FUNCTION_PATTERN.search(field)
+        if not match or match.group("function") not in FUNCTIONS:
+            raise InvalidSearchQuery("%s is not a valid function" % field)
+
+    function = FUNCTIONS[match.group("function")]
+    columns = [c.strip() for c in match.group("columns").split(",")]
+
+    if len(columns) != len(function["args"]):
+        raise InvalidSearchQuery("%s: expected %d arguments" % (field, len(function["args"])))
+
+    arguments = {}
+    for column_value, argument in zip(columns, function["args"]):
+        if argument["type"] == NUMBER:
+            try:
+                column_value = float(column_value)
+            except ValueError:
+                raise InvalidSearchQuery("%s: %s is not a number" % (field, column_value))
+        elif argument["type"] == NUMERIC_COLUMN:
+            # TODO evanh/wmak Do proper column validation here
+            snuba_column = SEARCH_MAP.get(column_value)
+            if not snuba_column:
+                raise InvalidSearchQuery("%s: %s is not a valid column" % (field, column_value))
+            elif snuba_column != "duration":
+                raise InvalidSearchQuery("%s: %s is not a numeric column" % (field, column_value))
+            column_value = snuba_column
+
+        if "validator" in argument:
+            valid, message = argument["validator"](column_value)
+            if not valid:
+                raise InvalidSearchQuery(
+                    "%s: %s argument invalid: %s" % (field, column_value, message)
+                )
+
+        arguments[argument["name"]] = column_value
+
+    snuba_string = function["transform"] % arguments
+    return [], [[snuba_string, None, get_function_alias(function["name"], columns)]]
+
+
 def resolve_orderby(orderby, fields, aggregations):
     """
     We accept column names, aggregate functions, and aliases as order by
@@ -946,6 +1019,10 @@ def resolve_field(field):
     if not isinstance(field, six.string_types):
         raise InvalidSearchQuery("Field names must be strings")
 
+    match = is_function(field)
+    if match:
+        return resolve_function(field, match)
+
     sans_parens = field.strip("()")
     if sans_parens in FIELD_ALIASES:
         special_field = deepcopy(FIELD_ALIASES[sans_parens])

+ 35 - 0
tests/sentry/api/test_event_search.py

@@ -1270,6 +1270,41 @@ class ResolveFieldListTest(unittest.TestCase):
             resolve_field_list(fields, {})
         assert "Invalid column" in six.text_type(err)
 
+    def test_percentile_function(self):
+        fields = ["percentile(transaction.duration, 0.75)"]
+        result = resolve_field_list(fields, {})
+
+        assert result["selected_columns"] == []
+        assert result["aggregations"] == [
+            ["quantile(0.75)(duration)", None, "percentile_transaction_duration_0_75"],
+            ["argMax", ["id", "timestamp"], "latest_event"],
+            ["argMax", ["project.id", "timestamp"], "projectid"],
+        ]
+        assert result["groupby"] == []
+
+        with pytest.raises(InvalidSearchQuery) as err:
+            fields = ["percentile(0.75)"]
+            result = resolve_field_list(fields, {})
+        assert "percentile(0.75): expected 2 arguments" in six.text_type(err)
+
+        with pytest.raises(InvalidSearchQuery) as err:
+            fields = ["percentile(sanchez, 0.75)"]
+            result = resolve_field_list(fields, {})
+        assert "percentile(sanchez, 0.75): sanchez is not a valid column" in six.text_type(err)
+
+        with pytest.raises(InvalidSearchQuery) as err:
+            fields = ["percentile(id, 0.75)"]
+            result = resolve_field_list(fields, {})
+        assert "percentile(id, 0.75): id is not a numeric column" in six.text_type(err)
+
+        with pytest.raises(InvalidSearchQuery) as err:
+            fields = ["percentile(transaction.duration, 75)"]
+            result = resolve_field_list(fields, {})
+        assert (
+            "percentile(transaction.duration, 75): 75.0 argument invalid: not between 0 and 1"
+            in six.text_type(err)
+        )
+
     def test_rollup_with_unaggregated_fields(self):
         with pytest.raises(InvalidSearchQuery) as err:
             fields = ["message"]

+ 34 - 0
tests/sentry/snuba/test_discover.py

@@ -414,6 +414,40 @@ class QueryTransformTest(TestCase):
             referrer=None,
         )
 
+    @patch("sentry.snuba.discover.raw_query")
+    def test_percentile_function(self, mock_query):
+        mock_query.return_value = {
+            "meta": [{"name": "transaction"}, {"name": "percentile_transaction_duration_0_75"}],
+            "data": [
+                {"transaction": "api.do_things", "percentile_transaction_duration_0_75": 1123}
+            ],
+        }
+        discover.query(
+            selected_columns=["transaction", "percentile(transaction.duration, 0.75)"],
+            query="",
+            params={"project_id": [self.project.id]},
+            auto_fields=True,
+        )
+        mock_query.assert_called_with(
+            selected_columns=["transaction"],
+            aggregations=[
+                ["quantile(0.75)(duration)", None, "percentile_transaction_duration_0_75"],
+                ["argMax", ["event_id", "timestamp"], "latest_event"],
+                ["argMax", ["project_id", "timestamp"], "projectid"],
+            ],
+            filter_keys={"project_id": [self.project.id]},
+            dataset=Dataset.Discover,
+            groupby=["transaction"],
+            conditions=[],
+            end=None,
+            start=None,
+            orderby=None,
+            having=[],
+            limit=50,
+            offset=None,
+            referrer=None,
+        )
+
     @patch("sentry.snuba.discover.raw_query")
     def test_orderby_limit_offset(self, mock_query):
         mock_query.return_value = {

+ 8 - 3
tests/snuba/api/endpoints/test_discover_query.py

@@ -414,9 +414,14 @@ class DiscoverQueryTest(APITestCase, SnubaTestCase):
 
         assert response.status_code == 200, response.content
         assert len(response.data["data"]) == 6
-        assert (response.data["data"][0]["time"]) > response.data["data"][2]["time"]
-        assert (response.data["data"][0]["project.name"]) == "bar"
-        assert (response.data["data"][0]["count"]) == 1
+        event_record = response.data["data"][0]
+        # This test can span across an hour, where the start is in hour 1, end is in hour 2, and event is in hour 2.
+        # That pushes the result to the second row.
+        if "project.name" not in event_record:
+            event_record = response.data["data"][1]
+        assert (event_record["time"]) > response.data["data"][2]["time"]
+        assert (event_record["project.name"]) == "bar"
+        assert (event_record["count"]) == 1
 
     def test_uniq_project_name(self):
         with self.feature("organizations:discover"):

+ 34 - 0
tests/snuba/api/endpoints/test_organization_events_v2.py

@@ -753,6 +753,40 @@ class OrganizationEventsV2EndpointTest(APITestCase, SnubaTestCase):
         data = response.data["data"]
         assert data[0]["issue.id"] == event.group_id
 
+    def test_percentile_function(self):
+        self.login_as(user=self.user)
+        project = self.create_project()
+        data = load_data("transaction")
+        data["transaction"] = "/aggregates/1"
+        data["timestamp"] = iso_format(before_now(minutes=1))
+        data["start_timestamp"] = iso_format(before_now(minutes=1, seconds=5))
+        event1 = self.store_event(data, project_id=project.id)
+
+        data = load_data("transaction")
+        data["transaction"] = "/aggregates/2"
+        data["timestamp"] = iso_format(before_now(minutes=1))
+        data["start_timestamp"] = iso_format(before_now(minutes=1, seconds=3))
+        event2 = self.store_event(data, project_id=project.id)
+
+        with self.feature("organizations:discover-basic"):
+            response = self.client.get(
+                self.url,
+                format="json",
+                data={
+                    "field": ["transaction", "percentile(transaction.duration, 0.95)"],
+                    "query": "event.type:transaction",
+                    "orderby": ["transaction"],
+                },
+            )
+
+        assert response.status_code == 200, response.content
+        assert len(response.data["data"]) == 2
+        data = response.data["data"]
+        assert data[0]["transaction"] == event1.transaction
+        assert data[0]["percentile_transaction_duration_0_95"] == 5000
+        assert data[1]["transaction"] == event2.transaction
+        assert data[1]["percentile_transaction_duration_0_95"] == 3000
+
     def test_nonexistent_fields(self):
         self.login_as(user=self.user)