|
@@ -1,19 +1,15 @@
|
|
|
import re
|
|
|
-from collections import defaultdict, namedtuple
|
|
|
-from copy import deepcopy
|
|
|
+from collections import namedtuple
|
|
|
from datetime import datetime
|
|
|
|
|
|
from django.utils.functional import cached_property
|
|
|
-from parsimonious.exceptions import IncompleteParseError, ParseError
|
|
|
+from parsimonious.exceptions import IncompleteParseError
|
|
|
from parsimonious.expressions import Optional
|
|
|
from parsimonious.grammar import Grammar, NodeVisitor
|
|
|
from parsimonious.nodes import Node, RegexNode
|
|
|
-from sentry_relay.consts import SPAN_STATUS_NAME_TO_CODE
|
|
|
|
|
|
-from sentry import eventstore
|
|
|
-from sentry.discover.models import KeyTransaction
|
|
|
-from sentry.models import Project
|
|
|
-from sentry.models.group import Group
|
|
|
+from sentry.search.events.constants import KEY_TRANSACTION_ALIAS, SEARCH_MAP, TAG_KEY_RE
|
|
|
+from sentry.search.events.fields import FIELD_ALIASES, FUNCTIONS, InvalidSearchQuery, resolve_field
|
|
|
from sentry.search.utils import (
|
|
|
InvalidQuery,
|
|
|
parse_datetime_range,
|
|
@@ -22,22 +18,9 @@ from sentry.search.utils import (
|
|
|
parse_duration,
|
|
|
parse_numeric_value,
|
|
|
parse_percentage,
|
|
|
- parse_release,
|
|
|
-)
|
|
|
-from sentry.snuba.dataset import Dataset
|
|
|
-from sentry.utils.compat import filter, map, zip
|
|
|
-from sentry.utils.dates import to_timestamp
|
|
|
-from sentry.utils.snuba import (
|
|
|
- DATASETS,
|
|
|
- FUNCTION_TO_OPERATOR,
|
|
|
- OPERATOR_TO_FUNCTION,
|
|
|
- SNUBA_AND,
|
|
|
- SNUBA_OR,
|
|
|
- get_json_type,
|
|
|
- is_duration_measurement,
|
|
|
- is_measurement,
|
|
|
- is_span_op_breakdown,
|
|
|
)
|
|
|
+from sentry.utils.compat import filter, map
|
|
|
+from sentry.utils.snuba import is_duration_measurement, is_measurement, is_span_op_breakdown
|
|
|
|
|
|
WILDCARD_CHARS = re.compile(r"[\*]")
|
|
|
NEGATION_MAP = {
|
|
@@ -48,10 +31,6 @@ NEGATION_MAP = {
|
|
|
">=": "<",
|
|
|
"IN": "NOT IN",
|
|
|
}
|
|
|
-equality_operators = frozenset(["=", "IN"])
|
|
|
-inequality_operators = frozenset(["!=", "NOT IN"])
|
|
|
-
|
|
|
-RESULT_TYPES = {"duration", "string", "number", "integer", "percentage", "date"}
|
|
|
|
|
|
|
|
|
def translate(pat):
|
|
@@ -196,49 +175,6 @@ spaces = ~r"\ *"
|
|
|
)
|
|
|
|
|
|
|
|
|
-# Create the known set of fields from the issue properties
|
|
|
-# and the transactions and events dataset mapping definitions.
|
|
|
-SEARCH_MAP = {
|
|
|
- "start": "start",
|
|
|
- "end": "end",
|
|
|
- "project_id": "project_id",
|
|
|
- "first_seen": "first_seen",
|
|
|
- "last_seen": "last_seen",
|
|
|
- "times_seen": "times_seen",
|
|
|
-}
|
|
|
-SEARCH_MAP.update(**DATASETS[Dataset.Events])
|
|
|
-SEARCH_MAP.update(**DATASETS[Dataset.Discover])
|
|
|
-
|
|
|
-no_conversion = {"start", "end"}
|
|
|
-
|
|
|
-PROJECT_NAME_ALIAS = "project.name"
|
|
|
-PROJECT_ALIAS = "project"
|
|
|
-ISSUE_ALIAS = "issue"
|
|
|
-ISSUE_ID_ALIAS = "issue.id"
|
|
|
-RELEASE_ALIAS = "release"
|
|
|
-USER_DISPLAY_ALIAS = "user.display"
|
|
|
-ERROR_UNHANDLED_ALIAS = "error.unhandled"
|
|
|
-KEY_TRANSACTION_ALIAS = "key_transaction"
|
|
|
-ARRAY_FIELDS = {
|
|
|
- "error.mechanism",
|
|
|
- "error.type",
|
|
|
- "error.value",
|
|
|
- "stack.abs_path",
|
|
|
- "stack.colno",
|
|
|
- "stack.filename",
|
|
|
- "stack.function",
|
|
|
- "stack.in_app",
|
|
|
- "stack.lineno",
|
|
|
- "stack.module",
|
|
|
- "stack.package",
|
|
|
- "stack.stack_level",
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-class InvalidSearchQuery(Exception):
|
|
|
- pass
|
|
|
-
|
|
|
-
|
|
|
class SearchBoolean(namedtuple("SearchBoolean", "left_term operator right_term")):
|
|
|
BOOLEAN_AND = "AND"
|
|
|
BOOLEAN_OR = "OR"
|
|
@@ -841,2171 +777,3 @@ def parse_search_query(query, allow_boolean=True, params=None):
|
|
|
)
|
|
|
)
|
|
|
return SearchVisitor(allow_boolean, params=params).visit(tree)
|
|
|
-
|
|
|
-
|
|
|
-def convert_aggregate_filter_to_snuba_query(aggregate_filter, params):
|
|
|
- name = aggregate_filter.key.name
|
|
|
- value = aggregate_filter.value.value
|
|
|
-
|
|
|
- if params is not None and name in params.get("aliases", {}):
|
|
|
- return params["aliases"][name].converter(aggregate_filter)
|
|
|
-
|
|
|
- value = (
|
|
|
- int(to_timestamp(value)) if isinstance(value, datetime) and name != "timestamp" else value
|
|
|
- )
|
|
|
-
|
|
|
- if aggregate_filter.operator in ("=", "!=") and aggregate_filter.value.value == "":
|
|
|
- return [["isNull", [name]], aggregate_filter.operator, 1]
|
|
|
-
|
|
|
- function = resolve_field(name, params, functions_acl=FUNCTIONS.keys())
|
|
|
- if function.aggregate is not None:
|
|
|
- name = function.aggregate[-1]
|
|
|
-
|
|
|
- condition = [name, aggregate_filter.operator, value]
|
|
|
- return condition
|
|
|
-
|
|
|
-
|
|
|
-def translate_transaction_status(val):
|
|
|
- if val not in SPAN_STATUS_NAME_TO_CODE:
|
|
|
- raise InvalidSearchQuery(
|
|
|
- f"Invalid value {val} for transaction.status condition. Accepted "
|
|
|
- f"values are {', '.join(SPAN_STATUS_NAME_TO_CODE.keys())}"
|
|
|
- )
|
|
|
- return SPAN_STATUS_NAME_TO_CODE[val]
|
|
|
-
|
|
|
-
|
|
|
-def convert_search_filter_to_snuba_query(search_filter, key=None, params=None):
|
|
|
- name = search_filter.key.name if key is None else key
|
|
|
- value = search_filter.value.value
|
|
|
-
|
|
|
- # We want to use group_id elsewhere so shouldn't be removed from the dataset
|
|
|
- # but if a user has a tag with the same name we want to make sure that works
|
|
|
- if name in {"group_id"}:
|
|
|
- name = f"tags[{name}]"
|
|
|
-
|
|
|
- if name in no_conversion:
|
|
|
- return
|
|
|
- elif name == "id" and search_filter.value.is_wildcard():
|
|
|
- raise InvalidSearchQuery("Wildcard conditions are not permitted on `id` field.")
|
|
|
- elif name == "environment":
|
|
|
- # conditions added to env_conditions are OR'd
|
|
|
- env_conditions = []
|
|
|
-
|
|
|
- values = set(value if isinstance(value, (list, tuple)) else [value])
|
|
|
- # the "no environment" environment is null in snuba
|
|
|
- if "" in values:
|
|
|
- values.remove("")
|
|
|
- operator = "IS NULL" if search_filter.operator == "=" else "IS NOT NULL"
|
|
|
- env_conditions.append(["environment", operator, None])
|
|
|
- if len(values) == 1:
|
|
|
- operator = "=" if search_filter.operator in equality_operators else "!="
|
|
|
- env_conditions.append(["environment", operator, values.pop()])
|
|
|
- elif values:
|
|
|
- operator = "IN" if search_filter.operator in equality_operators else "NOT IN"
|
|
|
- env_conditions.append(["environment", operator, values])
|
|
|
- return env_conditions
|
|
|
- elif name == "message":
|
|
|
- if search_filter.value.is_wildcard():
|
|
|
- # XXX: We don't want the '^$' values at the beginning and end of
|
|
|
- # the regex since we want to find the pattern anywhere in the
|
|
|
- # message. Strip off here
|
|
|
- value = search_filter.value.value[1:-1]
|
|
|
- return [["match", ["message", f"'(?i){value}'"]], search_filter.operator, 1]
|
|
|
- elif value == "":
|
|
|
- operator = "=" if search_filter.operator == "=" else "!="
|
|
|
- return [["equals", ["message", f"{value}"]], operator, 1]
|
|
|
- else:
|
|
|
- # https://clickhouse.yandex/docs/en/query_language/functions/string_search_functions/#position-haystack-needle
|
|
|
- # positionCaseInsensitive returns 0 if not found and an index of 1 or more if found
|
|
|
- # so we should flip the operator here
|
|
|
- operator = "!=" if search_filter.operator in equality_operators else "="
|
|
|
- if search_filter.is_in_filter:
|
|
|
- # XXX: This `toString` usage is unnecessary, but we need it in place to
|
|
|
- # trick the legacy Snuba language into not treating `message` as a
|
|
|
- # function. Once we switch over to snql it can be removed.
|
|
|
- return [
|
|
|
- [
|
|
|
- "multiSearchFirstPositionCaseInsensitive",
|
|
|
- [["toString", ["message"]], ["array", [f"'{v}'" for v in value]]],
|
|
|
- ],
|
|
|
- operator,
|
|
|
- 0,
|
|
|
- ]
|
|
|
-
|
|
|
- # make message search case insensitive
|
|
|
- return [["positionCaseInsensitive", ["message", f"'{value}'"]], operator, 0]
|
|
|
- elif name in ARRAY_FIELDS and search_filter.value.is_wildcard():
|
|
|
- # Escape and convert meta characters for LIKE expressions.
|
|
|
- raw_value = search_filter.value.raw_value
|
|
|
- like_value = raw_value.replace("%", "\\%").replace("_", "\\_").replace("*", "%")
|
|
|
- operator = "LIKE" if search_filter.operator == "=" else "NOT LIKE"
|
|
|
- return [name, operator, like_value]
|
|
|
- elif name in ARRAY_FIELDS and search_filter.is_in_filter:
|
|
|
- operator = "=" if search_filter.operator == "IN" else "!="
|
|
|
- # XXX: This `arrayConcat` usage is unnecessary, but we need it in place to
|
|
|
- # trick the legacy Snuba language into not treating `name` as a
|
|
|
- # function. Once we switch over to snql it can be removed.
|
|
|
- return [
|
|
|
- ["hasAny", [["arrayConcat", [name]], ["array", [f"'{v}'" for v in value]]]],
|
|
|
- operator,
|
|
|
- 1,
|
|
|
- ]
|
|
|
- elif name == "transaction.status":
|
|
|
- # Handle "has" queries
|
|
|
- if search_filter.value.raw_value == "":
|
|
|
- return [["isNull", [name]], search_filter.operator, 1]
|
|
|
-
|
|
|
- if search_filter.is_in_filter:
|
|
|
- internal_value = [
|
|
|
- translate_transaction_status(val) for val in search_filter.value.raw_value
|
|
|
- ]
|
|
|
- else:
|
|
|
- internal_value = translate_transaction_status(search_filter.value.raw_value)
|
|
|
-
|
|
|
- return [name, search_filter.operator, internal_value]
|
|
|
- elif name == "issue.id":
|
|
|
- # Handle "has" queries
|
|
|
- if (
|
|
|
- search_filter.value.raw_value == ""
|
|
|
- or search_filter.is_in_filter
|
|
|
- and [v for v in value if not v]
|
|
|
- ):
|
|
|
- # The state of having no issues is represented differently on transactions vs
|
|
|
- # other events. On the transactions table, it is represented by 0 whereas it is
|
|
|
- # represented by NULL everywhere else. We use coalesce here so we can treat this
|
|
|
- # consistently
|
|
|
- name = ["coalesce", [name, 0]]
|
|
|
- if search_filter.is_in_filter:
|
|
|
- value = [v if v else 0 for v in value]
|
|
|
- else:
|
|
|
- value = 0
|
|
|
-
|
|
|
- # Skip isNull check on group_id value as we want to
|
|
|
- # allow snuba's prewhere optimizer to find this condition.
|
|
|
- return [name, search_filter.operator, value]
|
|
|
- elif name == USER_DISPLAY_ALIAS:
|
|
|
- user_display_expr = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_expression(params)
|
|
|
-
|
|
|
- # Handle 'has' condition
|
|
|
- if search_filter.value.raw_value == "":
|
|
|
- return [["isNull", [user_display_expr]], search_filter.operator, 1]
|
|
|
- if search_filter.value.is_wildcard():
|
|
|
- return [
|
|
|
- ["match", [user_display_expr, f"'(?i){value}'"]],
|
|
|
- search_filter.operator,
|
|
|
- 1,
|
|
|
- ]
|
|
|
- return [user_display_expr, search_filter.operator, value]
|
|
|
- elif name == ERROR_UNHANDLED_ALIAS:
|
|
|
- # This field is the inversion of error.handled, otherwise the logic is the same.
|
|
|
- if search_filter.value.raw_value == "":
|
|
|
- output = 0 if search_filter.operator == "!=" else 1
|
|
|
- return [["isHandled", []], "=", output]
|
|
|
- if value in ("1", 1):
|
|
|
- return [["notHandled", []], "=", 1]
|
|
|
- if value in ("0", 0):
|
|
|
- return [["isHandled", []], "=", 1]
|
|
|
- raise InvalidSearchQuery(
|
|
|
- "Invalid value for error.unhandled condition. Accepted values are 1, 0"
|
|
|
- )
|
|
|
- elif name == "error.handled":
|
|
|
- # Treat has filter as equivalent to handled
|
|
|
- if search_filter.value.raw_value == "":
|
|
|
- output = 1 if search_filter.operator == "!=" else 0
|
|
|
- return [["isHandled", []], "=", output]
|
|
|
- # Null values and 1 are the same, and both indicate a handled error.
|
|
|
- if value in ("1", 1):
|
|
|
- return [["isHandled", []], "=", 1]
|
|
|
- if value in (
|
|
|
- "0",
|
|
|
- 0,
|
|
|
- ):
|
|
|
- return [["notHandled", []], "=", 1]
|
|
|
- raise InvalidSearchQuery(
|
|
|
- "Invalid value for error.handled condition. Accepted values are 1, 0"
|
|
|
- )
|
|
|
- elif name == KEY_TRANSACTION_ALIAS:
|
|
|
- key_transaction_expr = FIELD_ALIASES[KEY_TRANSACTION_ALIAS].get_expression(params)
|
|
|
-
|
|
|
- if search_filter.value.raw_value == "":
|
|
|
- operator = "!=" if search_filter.operator == "!=" else "="
|
|
|
- return [key_transaction_expr, operator, 0]
|
|
|
- if value in ("1", 1):
|
|
|
- return [key_transaction_expr, "=", 1]
|
|
|
- if value in ("0", 0):
|
|
|
- return [key_transaction_expr, "=", 0]
|
|
|
- raise InvalidSearchQuery(
|
|
|
- "Invalid value for key_transaction condition. Accepted values are 1, 0"
|
|
|
- )
|
|
|
- elif name in ARRAY_FIELDS and search_filter.value.raw_value == "":
|
|
|
- return [["notEmpty", [name]], "=", 1 if search_filter.operator == "!=" else 0]
|
|
|
- else:
|
|
|
- # timestamp{,.to_{hour,day}} need a datetime string
|
|
|
- # last_seen needs an integer
|
|
|
- if isinstance(value, datetime) and name not in {
|
|
|
- "timestamp",
|
|
|
- "timestamp.to_hour",
|
|
|
- "timestamp.to_day",
|
|
|
- }:
|
|
|
- value = int(to_timestamp(value)) * 1000
|
|
|
-
|
|
|
- # most field aliases are handled above but timestamp.to_{hour,day} are
|
|
|
- # handled here
|
|
|
- if name in FIELD_ALIASES:
|
|
|
- name = FIELD_ALIASES[name].get_expression(params)
|
|
|
-
|
|
|
- # Tags are never null, but promoted tags are columns and so can be null.
|
|
|
- # To handle both cases, use `ifNull` to convert to an empty string and
|
|
|
- # compare so we need to check for empty values.
|
|
|
- if search_filter.key.is_tag:
|
|
|
- name = ["ifNull", [name, "''"]]
|
|
|
-
|
|
|
- # Handle checks for existence
|
|
|
- if search_filter.operator in ("=", "!=") and search_filter.value.value == "":
|
|
|
- if search_filter.key.is_tag:
|
|
|
- return [name, search_filter.operator, value]
|
|
|
- else:
|
|
|
- # If not a tag, we can just check that the column is null.
|
|
|
- return [["isNull", [name]], search_filter.operator, 1]
|
|
|
-
|
|
|
- is_null_condition = None
|
|
|
- # TODO(wmak): Skip this for all non-nullable keys not just event.type
|
|
|
- if (
|
|
|
- search_filter.operator in ("!=", "NOT IN")
|
|
|
- and not search_filter.key.is_tag
|
|
|
- and name != "event.type"
|
|
|
- ):
|
|
|
- # Handle null columns on inequality comparisons. Any comparison
|
|
|
- # between a value and a null will result to null, so we need to
|
|
|
- # explicitly check for whether the condition is null, and OR it
|
|
|
- # together with the inequality check.
|
|
|
- # We don't need to apply this for tags, since if they don't exist
|
|
|
- # they'll always be an empty string.
|
|
|
- is_null_condition = [["isNull", [name]], "=", 1]
|
|
|
-
|
|
|
- if search_filter.value.is_wildcard():
|
|
|
- condition = [["match", [name, f"'(?i){value}'"]], search_filter.operator, 1]
|
|
|
- else:
|
|
|
- condition = [name, search_filter.operator, value]
|
|
|
-
|
|
|
- # We only want to return as a list if we have the check for null
|
|
|
- # present. Returning as a list causes these conditions to be ORed
|
|
|
- # together. Otherwise just return the raw condition, so that it can be
|
|
|
- # used correctly in aggregates.
|
|
|
- if is_null_condition:
|
|
|
- return [is_null_condition, condition]
|
|
|
- else:
|
|
|
- return condition
|
|
|
-
|
|
|
-
|
|
|
-def to_list(value):
|
|
|
- if isinstance(value, list):
|
|
|
- return value
|
|
|
- return [value]
|
|
|
-
|
|
|
-
|
|
|
-def format_search_filter(term, params):
|
|
|
- projects_to_filter = [] # Used to avoid doing multiple conditions on project ID
|
|
|
- conditions = []
|
|
|
- group_ids = None
|
|
|
- name = term.key.name
|
|
|
- value = term.value.value
|
|
|
- if name in (PROJECT_ALIAS, PROJECT_NAME_ALIAS):
|
|
|
- if term.operator == "=" and value == "":
|
|
|
- raise InvalidSearchQuery("Invalid query for 'has' search: 'project' cannot be empty.")
|
|
|
- slugs = to_list(value)
|
|
|
- projects = {
|
|
|
- p.slug: p.id
|
|
|
- for p in Project.objects.filter(id__in=params.get("project_id", []), slug__in=slugs)
|
|
|
- }
|
|
|
- missing = [slug for slug in slugs if slug not in projects]
|
|
|
- if missing and term.operator in equality_operators:
|
|
|
- raise InvalidSearchQuery(
|
|
|
- f"Invalid query. Project(s) {', '.join(missing)} do not exist or are not actively selected."
|
|
|
- )
|
|
|
- project_ids = list(sorted(projects.values()))
|
|
|
- if project_ids:
|
|
|
- # Create a new search filter with the correct values
|
|
|
- term = SearchFilter(
|
|
|
- SearchKey("project_id"),
|
|
|
- term.operator,
|
|
|
- SearchValue(project_ids if term.is_in_filter else project_ids[0]),
|
|
|
- )
|
|
|
- converted_filter = convert_search_filter_to_snuba_query(term)
|
|
|
- if converted_filter:
|
|
|
- if term.operator in equality_operators:
|
|
|
- projects_to_filter = project_ids
|
|
|
- conditions.append(converted_filter)
|
|
|
- elif name == ISSUE_ID_ALIAS and value != "":
|
|
|
- # A blank term value means that this is a has filter
|
|
|
- group_ids = to_list(value)
|
|
|
- elif name == ISSUE_ALIAS:
|
|
|
- operator = term.operator
|
|
|
- value = to_list(value)
|
|
|
- # `unknown` is a special value for when there is no issue associated with the event
|
|
|
- group_short_ids = [v for v in value if v and v != "unknown"]
|
|
|
- filter_values = ["" for v in value if not v or v == "unknown"]
|
|
|
-
|
|
|
- if group_short_ids and params and "organization_id" in params:
|
|
|
- try:
|
|
|
- groups = Group.objects.by_qualified_short_id_bulk(
|
|
|
- params["organization_id"],
|
|
|
- group_short_ids,
|
|
|
- )
|
|
|
- except Exception:
|
|
|
- raise InvalidSearchQuery(f"Invalid value '{group_short_ids}' for 'issue:' filter")
|
|
|
- else:
|
|
|
- filter_values.extend([g.id for g in groups])
|
|
|
-
|
|
|
- term = SearchFilter(
|
|
|
- SearchKey("issue.id"),
|
|
|
- operator,
|
|
|
- SearchValue(filter_values if term.is_in_filter else filter_values[0]),
|
|
|
- )
|
|
|
- converted_filter = convert_search_filter_to_snuba_query(term)
|
|
|
- conditions.append(converted_filter)
|
|
|
- elif (
|
|
|
- name == RELEASE_ALIAS
|
|
|
- and params
|
|
|
- and (value == "latest" or term.is_in_filter and any(v == "latest" for v in value))
|
|
|
- ):
|
|
|
- value = [
|
|
|
- parse_release(
|
|
|
- v,
|
|
|
- params["project_id"],
|
|
|
- params.get("environment_objects"),
|
|
|
- params.get("organization_id"),
|
|
|
- )
|
|
|
- for v in to_list(value)
|
|
|
- ]
|
|
|
-
|
|
|
- converted_filter = convert_search_filter_to_snuba_query(
|
|
|
- SearchFilter(
|
|
|
- term.key,
|
|
|
- term.operator,
|
|
|
- SearchValue(value if term.is_in_filter else value[0]),
|
|
|
- )
|
|
|
- )
|
|
|
- if converted_filter:
|
|
|
- conditions.append(converted_filter)
|
|
|
- else:
|
|
|
- converted_filter = convert_search_filter_to_snuba_query(term, params=params)
|
|
|
- if converted_filter:
|
|
|
- conditions.append(converted_filter)
|
|
|
-
|
|
|
- return conditions, projects_to_filter, group_ids
|
|
|
-
|
|
|
-
|
|
|
-def convert_condition_to_function(cond):
|
|
|
- function = OPERATOR_TO_FUNCTION.get(cond[1])
|
|
|
- if not function:
|
|
|
- # It's hard to make this error more specific without exposing internals to the end user
|
|
|
- raise InvalidSearchQuery(f"Operator {cond[1]} is not a valid condition operator.")
|
|
|
-
|
|
|
- return [function, [cond[0], cond[2]]]
|
|
|
-
|
|
|
-
|
|
|
-def convert_function_to_condition(func):
|
|
|
- operator = FUNCTION_TO_OPERATOR.get(func[0])
|
|
|
- if not operator:
|
|
|
- return [func, "=", 1]
|
|
|
-
|
|
|
- return [func[1][0], operator, func[1][1]]
|
|
|
-
|
|
|
-
|
|
|
-def convert_array_to_tree(operator, terms):
|
|
|
- """
|
|
|
- Convert an array of conditions into a binary tree joined by the operator.
|
|
|
- """
|
|
|
- if len(terms) == 1:
|
|
|
- return terms[0]
|
|
|
- elif len(terms) == 2:
|
|
|
- return [operator, terms]
|
|
|
-
|
|
|
- return [operator, [terms[0], convert_array_to_tree(operator, terms[1:])]]
|
|
|
-
|
|
|
-
|
|
|
-def flatten_condition_tree(tree, condition_function):
|
|
|
- """
|
|
|
- Take a binary tree of conditions, and flatten all of the terms using the condition function.
|
|
|
- E.g. f( and(and(b, c), and(d, e)), and ) -> [b, c, d, e]
|
|
|
- """
|
|
|
- stack = [tree]
|
|
|
- flattened = []
|
|
|
- while len(stack) > 0:
|
|
|
- item = stack.pop(0)
|
|
|
- if item[0] == condition_function:
|
|
|
- stack.extend(item[1])
|
|
|
- else:
|
|
|
- flattened.append(item)
|
|
|
-
|
|
|
- return flattened
|
|
|
-
|
|
|
-
|
|
|
-def is_condition(term):
|
|
|
- return isinstance(term, (tuple, list)) and len(term) == 3 and term[1] in OPERATOR_TO_FUNCTION
|
|
|
-
|
|
|
-
|
|
|
-def convert_snuba_condition_to_function(term, params=None):
|
|
|
- if isinstance(term, ParenExpression):
|
|
|
- return convert_search_boolean_to_snuba_query(term.children, params)
|
|
|
-
|
|
|
- group_ids = []
|
|
|
- projects_to_filter = []
|
|
|
- if isinstance(term, SearchFilter):
|
|
|
- conditions, projects_to_filter, group_ids = format_search_filter(term, params)
|
|
|
- group_ids = group_ids if group_ids else []
|
|
|
- if conditions:
|
|
|
- conditions_to_and = []
|
|
|
- for cond in conditions:
|
|
|
- if is_condition(cond):
|
|
|
- conditions_to_and.append(convert_condition_to_function(cond))
|
|
|
- else:
|
|
|
- conditions_to_and.append(
|
|
|
- convert_array_to_tree(
|
|
|
- SNUBA_OR, [convert_condition_to_function(c) for c in cond]
|
|
|
- )
|
|
|
- )
|
|
|
-
|
|
|
- condition_tree = None
|
|
|
- if len(conditions_to_and) == 1:
|
|
|
- condition_tree = conditions_to_and[0]
|
|
|
- elif len(conditions_to_and) > 1:
|
|
|
- condition_tree = convert_array_to_tree(SNUBA_AND, conditions_to_and)
|
|
|
- return condition_tree, None, projects_to_filter, group_ids
|
|
|
- elif isinstance(term, AggregateFilter):
|
|
|
- converted_filter = convert_aggregate_filter_to_snuba_query(term, params)
|
|
|
- return None, convert_condition_to_function(converted_filter), projects_to_filter, group_ids
|
|
|
-
|
|
|
- return None, None, projects_to_filter, group_ids
|
|
|
-
|
|
|
-
|
|
|
-def convert_search_boolean_to_snuba_query(terms, params=None):
|
|
|
- if len(terms) == 1:
|
|
|
- return convert_snuba_condition_to_function(terms[0], params)
|
|
|
-
|
|
|
- # Filter out any ANDs since we can assume anything without an OR is an AND. Also do some
|
|
|
- # basic sanitization of the query: can't have two operators next to each other, and can't
|
|
|
- # start or end a query with an operator.
|
|
|
- prev = None
|
|
|
- new_terms = []
|
|
|
- for term in terms:
|
|
|
- if prev:
|
|
|
- if SearchBoolean.is_operator(prev) and SearchBoolean.is_operator(term):
|
|
|
- raise InvalidSearchQuery(
|
|
|
- f"Missing condition in between two condition operators: '{prev} {term}'"
|
|
|
- )
|
|
|
- else:
|
|
|
- if SearchBoolean.is_operator(term):
|
|
|
- raise InvalidSearchQuery(
|
|
|
- f"Condition is missing on the left side of '{term}' operator"
|
|
|
- )
|
|
|
-
|
|
|
- if term != SearchBoolean.BOOLEAN_AND:
|
|
|
- new_terms.append(term)
|
|
|
- prev = term
|
|
|
- if SearchBoolean.is_operator(term):
|
|
|
- raise InvalidSearchQuery(f"Condition is missing on the right side of '{term}' operator")
|
|
|
- terms = new_terms
|
|
|
-
|
|
|
- # We put precedence on AND, which sort of counter-intuitevely means we have to split the query
|
|
|
- # on ORs first, so the ANDs are grouped together. Search through the query for ORs and split the
|
|
|
- # query on each OR.
|
|
|
- # We want to maintain a binary tree, so split the terms on the first OR we can find and recurse on
|
|
|
- # the two sides. If there is no OR, split the first element out to AND
|
|
|
- index = None
|
|
|
- lhs, rhs = None, None
|
|
|
- operator = None
|
|
|
- try:
|
|
|
- index = terms.index(SearchBoolean.BOOLEAN_OR)
|
|
|
- lhs, rhs = terms[:index], terms[index + 1 :]
|
|
|
- operator = SNUBA_OR
|
|
|
- except Exception:
|
|
|
- lhs, rhs = terms[:1], terms[1:]
|
|
|
- operator = SNUBA_AND
|
|
|
-
|
|
|
- (
|
|
|
- lhs_condition,
|
|
|
- lhs_having,
|
|
|
- projects_to_filter,
|
|
|
- group_ids,
|
|
|
- ) = convert_search_boolean_to_snuba_query(lhs, params)
|
|
|
- (
|
|
|
- rhs_condition,
|
|
|
- rhs_having,
|
|
|
- rhs_projects_to_filter,
|
|
|
- rhs_group_ids,
|
|
|
- ) = convert_search_boolean_to_snuba_query(rhs, params)
|
|
|
-
|
|
|
- projects_to_filter.extend(rhs_projects_to_filter)
|
|
|
- group_ids.extend(rhs_group_ids)
|
|
|
-
|
|
|
- if operator == SNUBA_OR and (lhs_condition or rhs_condition) and (lhs_having or rhs_having):
|
|
|
- raise InvalidSearchQuery(
|
|
|
- "Having an OR between aggregate filters and normal filters is invalid."
|
|
|
- )
|
|
|
-
|
|
|
- condition, having = None, None
|
|
|
- if lhs_condition or rhs_condition:
|
|
|
- args = filter(None, [lhs_condition, rhs_condition])
|
|
|
- if not args:
|
|
|
- condition = None
|
|
|
- elif len(args) == 1:
|
|
|
- condition = args[0]
|
|
|
- else:
|
|
|
- condition = [operator, args]
|
|
|
-
|
|
|
- if lhs_having or rhs_having:
|
|
|
- args = filter(None, [lhs_having, rhs_having])
|
|
|
- if not args:
|
|
|
- having = None
|
|
|
- elif len(args) == 1:
|
|
|
- having = args[0]
|
|
|
- else:
|
|
|
- having = [operator, args]
|
|
|
-
|
|
|
- return condition, having, projects_to_filter, group_ids
|
|
|
-
|
|
|
-
|
|
|
-def get_filter(query=None, params=None):
|
|
|
- """
|
|
|
- Returns an eventstore filter given the search text provided by the user and
|
|
|
- URL params
|
|
|
- """
|
|
|
- # NOTE: this function assumes project permissions check already happened
|
|
|
- parsed_terms = []
|
|
|
- if query is not None:
|
|
|
- try:
|
|
|
- parsed_terms = parse_search_query(query, allow_boolean=True, params=params)
|
|
|
- except ParseError as e:
|
|
|
- raise InvalidSearchQuery(f"Parse error: {e.expr.name} (column {e.column():d})")
|
|
|
-
|
|
|
- kwargs = {
|
|
|
- "start": None,
|
|
|
- "end": None,
|
|
|
- "conditions": [],
|
|
|
- "having": [],
|
|
|
- "user_id": None,
|
|
|
- "organization_id": None,
|
|
|
- "project_ids": [],
|
|
|
- "group_ids": [],
|
|
|
- "condition_aggregates": [],
|
|
|
- "aliases": params.get("aliases", {}) if params is not None else {},
|
|
|
- }
|
|
|
-
|
|
|
- projects_to_filter = []
|
|
|
- if any(
|
|
|
- isinstance(term, ParenExpression) or SearchBoolean.is_operator(term)
|
|
|
- for term in parsed_terms
|
|
|
- ):
|
|
|
- (
|
|
|
- condition,
|
|
|
- having,
|
|
|
- found_projects_to_filter,
|
|
|
- group_ids,
|
|
|
- ) = convert_search_boolean_to_snuba_query(parsed_terms, params)
|
|
|
-
|
|
|
- if condition:
|
|
|
- and_conditions = flatten_condition_tree(condition, SNUBA_AND)
|
|
|
- for func in and_conditions:
|
|
|
- kwargs["conditions"].append(convert_function_to_condition(func))
|
|
|
- if having:
|
|
|
- kwargs["condition_aggregates"] = [
|
|
|
- term.key.name for term in parsed_terms if isinstance(term, AggregateFilter)
|
|
|
- ]
|
|
|
- and_having = flatten_condition_tree(having, SNUBA_AND)
|
|
|
- for func in and_having:
|
|
|
- kwargs["having"].append(convert_function_to_condition(func))
|
|
|
- if found_projects_to_filter:
|
|
|
- projects_to_filter = list(set(found_projects_to_filter))
|
|
|
- if group_ids is not None:
|
|
|
- kwargs["group_ids"].extend(list(set(group_ids)))
|
|
|
- else:
|
|
|
- projects_to_filter = set()
|
|
|
- for term in parsed_terms:
|
|
|
- if isinstance(term, SearchFilter):
|
|
|
- conditions, found_projects_to_filter, group_ids = format_search_filter(term, params)
|
|
|
- if len(conditions) > 0:
|
|
|
- kwargs["conditions"].extend(conditions)
|
|
|
- if found_projects_to_filter:
|
|
|
- projects_to_filter.update(found_projects_to_filter)
|
|
|
- if group_ids is not None:
|
|
|
- kwargs["group_ids"].extend(group_ids)
|
|
|
- elif isinstance(term, AggregateFilter):
|
|
|
- converted_filter = convert_aggregate_filter_to_snuba_query(term, params)
|
|
|
- kwargs["condition_aggregates"].append(term.key.name)
|
|
|
- if converted_filter:
|
|
|
- kwargs["having"].append(converted_filter)
|
|
|
- projects_to_filter = list(projects_to_filter)
|
|
|
-
|
|
|
- # Keys included as url params take precedent if same key is included in search
|
|
|
- # They are also considered safe and to have had access rules applied unlike conditions
|
|
|
- # from the query string.
|
|
|
- if params:
|
|
|
- for key in ("start", "end"):
|
|
|
- kwargs[key] = params.get(key, None)
|
|
|
- # OrganizationEndpoint.get_filter() uses project_id, but eventstore.Filter uses project_ids
|
|
|
- if "user_id" in params:
|
|
|
- kwargs["user_id"] = params["user_id"]
|
|
|
- if "organization_id" in params:
|
|
|
- kwargs["organization_id"] = params["organization_id"]
|
|
|
- if "project_id" in params:
|
|
|
- if projects_to_filter:
|
|
|
- kwargs["project_ids"] = projects_to_filter
|
|
|
- else:
|
|
|
- kwargs["project_ids"] = params["project_id"]
|
|
|
- if "environment" in params:
|
|
|
- term = SearchFilter(SearchKey("environment"), "=", SearchValue(params["environment"]))
|
|
|
- kwargs["conditions"].append(convert_search_filter_to_snuba_query(term))
|
|
|
- if "group_ids" in params:
|
|
|
- kwargs["group_ids"] = to_list(params["group_ids"])
|
|
|
- # Deprecated alias, use `group_ids` instead
|
|
|
- if ISSUE_ID_ALIAS in params:
|
|
|
- kwargs["group_ids"] = to_list(params["issue.id"])
|
|
|
-
|
|
|
- return eventstore.Filter(**kwargs)
|
|
|
-
|
|
|
-
|
|
|
-class PseudoField:
|
|
|
- def __init__(self, name, alias, expression=None, expression_fn=None, result_type=None):
|
|
|
- self.name = name
|
|
|
- self.alias = alias
|
|
|
- self.expression = expression
|
|
|
- self.expression_fn = expression_fn
|
|
|
- self.result_type = result_type
|
|
|
-
|
|
|
- self.validate()
|
|
|
-
|
|
|
- def get_expression(self, params):
|
|
|
- if isinstance(self.expression, (list, tuple)):
|
|
|
- return deepcopy(self.expression)
|
|
|
- elif self.expression_fn is not None:
|
|
|
- return self.expression_fn(params)
|
|
|
- return None
|
|
|
-
|
|
|
- def get_field(self, params=None):
|
|
|
- expression = self.get_expression(params)
|
|
|
- if expression is not None:
|
|
|
- expression.append(self.alias)
|
|
|
- return expression
|
|
|
- return self.alias
|
|
|
-
|
|
|
- def validate(self):
|
|
|
- assert self.alias is not None, f"{self.name}: alias is required"
|
|
|
- assert (
|
|
|
- self.expression is None or self.expression_fn is None
|
|
|
- ), f"{self.name}: only one of expression, expression_fn is allowed"
|
|
|
-
|
|
|
-
|
|
|
-def key_transaction_expression(user_id, organization_id, project_ids):
|
|
|
- """
|
|
|
- This function may be called multiple times, making for repeated data bases queries.
|
|
|
- Lifting the query higher to earlier in the call stack will require a lot more changes
|
|
|
- as there are numerous entry points. So we will leave the duplicate query alone for now.
|
|
|
- """
|
|
|
- if user_id is None or organization_id is None or project_ids is None:
|
|
|
- raise InvalidSearchQuery("Missing necessary meta for key transaction field.")
|
|
|
-
|
|
|
- key_transactions = (
|
|
|
- KeyTransaction.objects.filter(
|
|
|
- owner_id=user_id,
|
|
|
- organization_id=organization_id,
|
|
|
- project_id__in=project_ids,
|
|
|
- )
|
|
|
- .order_by("transaction", "project_id")
|
|
|
- .values("project_id", "transaction")
|
|
|
- )
|
|
|
-
|
|
|
- # if there are no key transactions, the value should always be 0
|
|
|
- if not len(key_transactions):
|
|
|
- return ["toInt64", [0]]
|
|
|
-
|
|
|
- return [
|
|
|
- "has",
|
|
|
- [
|
|
|
- [
|
|
|
- "array",
|
|
|
- [
|
|
|
- [
|
|
|
- "tuple",
|
|
|
- [
|
|
|
- ["toUInt64", [transaction["project_id"]]],
|
|
|
- "'{}'".format(transaction["transaction"]),
|
|
|
- ],
|
|
|
- ]
|
|
|
- for transaction in key_transactions
|
|
|
- ],
|
|
|
- ],
|
|
|
- ["tuple", ["project_id", "transaction"]],
|
|
|
- ],
|
|
|
- ]
|
|
|
-
|
|
|
-
|
|
|
-# When updating this list, also check if the following need to be updated:
|
|
|
-# - convert_search_filter_to_snuba_query (otherwise aliased field will be treated as tag)
|
|
|
-# - static/app/utils/discover/fields.tsx FIELDS (for discover column list and search box autocomplete)
|
|
|
-FIELD_ALIASES = {
|
|
|
- field.name: field
|
|
|
- for field in [
|
|
|
- PseudoField("project", "project.id"),
|
|
|
- PseudoField("issue", "issue.id"),
|
|
|
- PseudoField(
|
|
|
- "timestamp.to_hour", "timestamp.to_hour", expression=["toStartOfHour", ["timestamp"]]
|
|
|
- ),
|
|
|
- PseudoField(
|
|
|
- "timestamp.to_day", "timestamp.to_day", expression=["toStartOfDay", ["timestamp"]]
|
|
|
- ),
|
|
|
- PseudoField(ERROR_UNHANDLED_ALIAS, ERROR_UNHANDLED_ALIAS, expression=["notHandled", []]),
|
|
|
- PseudoField(
|
|
|
- USER_DISPLAY_ALIAS,
|
|
|
- USER_DISPLAY_ALIAS,
|
|
|
- expression=["coalesce", ["user.email", "user.username", "user.ip"]],
|
|
|
- ),
|
|
|
- # the key transaction field is intentially not added to the discover/fields list yet
|
|
|
- # because there needs to be some work on the front end to integrate this into discover
|
|
|
- PseudoField(
|
|
|
- KEY_TRANSACTION_ALIAS,
|
|
|
- KEY_TRANSACTION_ALIAS,
|
|
|
- expression_fn=lambda params: key_transaction_expression(
|
|
|
- params.get("user_id"),
|
|
|
- params.get("organization_id"),
|
|
|
- params.get("project_id"),
|
|
|
- ),
|
|
|
- result_type="boolean",
|
|
|
- ),
|
|
|
- ]
|
|
|
-}
|
|
|
-
|
|
|
-
|
|
|
-def get_json_meta_type(field_alias, snuba_type, function=None):
|
|
|
- alias_definition = FIELD_ALIASES.get(field_alias)
|
|
|
- if alias_definition and alias_definition.result_type is not None:
|
|
|
- return alias_definition.result_type
|
|
|
-
|
|
|
- snuba_json = get_json_type(snuba_type)
|
|
|
- if snuba_json != "string":
|
|
|
- if function is not None:
|
|
|
- result_type = function.instance.get_result_type(function.field, function.arguments)
|
|
|
- if result_type is not None:
|
|
|
- return result_type
|
|
|
-
|
|
|
- function_match = FUNCTION_ALIAS_PATTERN.match(field_alias)
|
|
|
- if function_match:
|
|
|
- function_definition = FUNCTIONS.get(function_match.group(1))
|
|
|
- if function_definition:
|
|
|
- result_type = function_definition.get_result_type()
|
|
|
- if result_type is not None:
|
|
|
- return result_type
|
|
|
-
|
|
|
- if (
|
|
|
- "duration" in field_alias
|
|
|
- or is_duration_measurement(field_alias)
|
|
|
- or is_span_op_breakdown(field_alias)
|
|
|
- ):
|
|
|
- return "duration"
|
|
|
- if is_measurement(field_alias):
|
|
|
- return "number"
|
|
|
- if field_alias == "transaction.status":
|
|
|
- return "string"
|
|
|
- return snuba_json
|
|
|
-
|
|
|
-
|
|
|
-# Based on general/src/protocol/tags.rs in relay
|
|
|
-VALID_FIELD_PATTERN = re.compile(r"^[a-zA-Z0-9_.:-]*$")
|
|
|
-
|
|
|
-# The regex for alias here is to match any word, but exclude anything that is only digits
|
|
|
-# eg. 123 doesn't match, but test_123 will match
|
|
|
-ALIAS_REGEX = r"(\w+)?(?!\d+)\w+"
|
|
|
-
|
|
|
-ALIAS_PATTERN = re.compile(fr"{ALIAS_REGEX}$")
|
|
|
-FUNCTION_PATTERN = re.compile(
|
|
|
- fr"^(?P<function>[^\(]+)\((?P<columns>.*)\)( (as|AS) (?P<alias>{ALIAS_REGEX}))?$"
|
|
|
-)
|
|
|
-
|
|
|
-
|
|
|
-class InvalidFunctionArgument(Exception):
|
|
|
- pass
|
|
|
-
|
|
|
-
|
|
|
-class ArgValue:
|
|
|
- def __init__(self, arg):
|
|
|
- self.arg = arg
|
|
|
-
|
|
|
-
|
|
|
-class FunctionArg:
|
|
|
- def __init__(self, name):
|
|
|
- self.name = name
|
|
|
- self.has_default = False
|
|
|
-
|
|
|
- def get_default(self, params):
|
|
|
- raise InvalidFunctionArgument(f"{self.name} has no defaults")
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- return value
|
|
|
-
|
|
|
- def get_type(self, value):
|
|
|
- raise InvalidFunctionArgument(f"{self.name} has no type defined")
|
|
|
-
|
|
|
-
|
|
|
-class FunctionAliasArg(FunctionArg):
|
|
|
- def normalize(self, value, params):
|
|
|
- if not ALIAS_PATTERN.match(value):
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a valid function alias")
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class NullColumn(FunctionArg):
|
|
|
- """
|
|
|
- Convert the provided column to null so that we
|
|
|
- can drop it. Used to make count() not have a
|
|
|
- required argument that we ignore.
|
|
|
- """
|
|
|
-
|
|
|
- def __init__(self, name):
|
|
|
- super().__init__(name)
|
|
|
- self.has_default = True
|
|
|
-
|
|
|
- def get_default(self, params):
|
|
|
- return None
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-class CountColumn(FunctionArg):
|
|
|
- def __init__(self, name):
|
|
|
- super().__init__(name)
|
|
|
- self.has_default = True
|
|
|
-
|
|
|
- def get_default(self, params):
|
|
|
- return None
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- if value is None:
|
|
|
- raise InvalidFunctionArgument("a column is required")
|
|
|
-
|
|
|
- if value not in FIELD_ALIASES:
|
|
|
- return value
|
|
|
-
|
|
|
- field = FIELD_ALIASES[value]
|
|
|
-
|
|
|
- # If the alias has an expression prefer that over the column alias
|
|
|
- # This enables user.display to work in aggregates
|
|
|
- expression = field.get_expression(params)
|
|
|
- if expression is not None:
|
|
|
- return expression
|
|
|
- elif field.alias is not None:
|
|
|
- return field.alias
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class FieldColumn(CountColumn):
|
|
|
- """ Allow any field column, of any type """
|
|
|
-
|
|
|
- def get_type(self, value):
|
|
|
- if is_duration_measurement(value) or is_span_op_breakdown(value):
|
|
|
- return "duration"
|
|
|
- elif value == "transaction.duration":
|
|
|
- return "duration"
|
|
|
- elif value == "timestamp":
|
|
|
- return "date"
|
|
|
- return "string"
|
|
|
-
|
|
|
-
|
|
|
-class StringArg(FunctionArg):
|
|
|
- def __init__(self, name, unquote=False, unescape_quotes=False):
|
|
|
- super().__init__(name)
|
|
|
- self.unquote = unquote
|
|
|
- self.unescape_quotes = unescape_quotes
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- if self.unquote:
|
|
|
- if len(value) < 2 or value[0] != '"' or value[-1] != '"':
|
|
|
- raise InvalidFunctionArgument("string should be quoted")
|
|
|
- value = value[1:-1]
|
|
|
- if self.unescape_quotes:
|
|
|
- value = re.sub(r'\\"', '"', value)
|
|
|
- return f"'{value}'"
|
|
|
-
|
|
|
-
|
|
|
-class DateArg(FunctionArg):
|
|
|
- date_format = "%Y-%m-%dT%H:%M:%S"
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- try:
|
|
|
- datetime.strptime(value, self.date_format)
|
|
|
- except ValueError:
|
|
|
- raise InvalidFunctionArgument(
|
|
|
- f"{value} is in the wrong format, expected a date like 2020-03-14T15:14:15"
|
|
|
- )
|
|
|
- return f"'{value}'"
|
|
|
-
|
|
|
-
|
|
|
-class ConditionArg(FunctionArg):
|
|
|
- # List and not a set so the error message is consistent
|
|
|
- VALID_CONDITIONS = [
|
|
|
- "equals",
|
|
|
- "notEquals",
|
|
|
- "lessOrEquals",
|
|
|
- "greaterOrEquals",
|
|
|
- "less",
|
|
|
- "greater",
|
|
|
- ]
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- if value not in self.VALID_CONDITIONS:
|
|
|
- raise InvalidFunctionArgument(
|
|
|
- "{} is not a valid condition, the only supported conditions are: {}".format(
|
|
|
- value,
|
|
|
- ",".join(self.VALID_CONDITIONS),
|
|
|
- )
|
|
|
- )
|
|
|
-
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class Column(FunctionArg):
|
|
|
- def __init__(self, name, allowed_columns=None):
|
|
|
- super().__init__(name)
|
|
|
- # make sure to map the allowed columns to their snuba names
|
|
|
- self.allowed_columns = [SEARCH_MAP.get(col) for col in allowed_columns]
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- snuba_column = SEARCH_MAP.get(value)
|
|
|
- if self.allowed_columns is not None:
|
|
|
- if value in self.allowed_columns or snuba_column in self.allowed_columns:
|
|
|
- return snuba_column
|
|
|
- else:
|
|
|
- raise InvalidFunctionArgument(f"{value} is not an allowed column")
|
|
|
- if not snuba_column:
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a valid column")
|
|
|
- return snuba_column
|
|
|
-
|
|
|
-
|
|
|
-class ColumnNoLookup(Column):
|
|
|
- def __init__(self, name, allowed_columns=None):
|
|
|
- super().__init__(name, allowed_columns=allowed_columns)
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- super().normalize(value, params)
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class NumericColumn(FunctionArg):
|
|
|
- def _normalize(self, value):
|
|
|
- # This method is written in this way so that `get_type` can always call
|
|
|
- # this even in child classes where `normalize` have been overridden.
|
|
|
-
|
|
|
- snuba_column = SEARCH_MAP.get(value)
|
|
|
- if not snuba_column and is_measurement(value):
|
|
|
- return value
|
|
|
- if not snuba_column and is_span_op_breakdown(value):
|
|
|
- return value
|
|
|
- if not snuba_column:
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a valid column")
|
|
|
- elif snuba_column not in ["time", "timestamp", "duration"]:
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a numeric column")
|
|
|
- return snuba_column
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- return self._normalize(value)
|
|
|
-
|
|
|
- def get_type(self, value):
|
|
|
- snuba_column = self._normalize(value)
|
|
|
- if is_duration_measurement(snuba_column) or is_span_op_breakdown(snuba_column):
|
|
|
- return "duration"
|
|
|
- elif snuba_column == "duration":
|
|
|
- return "duration"
|
|
|
- elif snuba_column == "timestamp":
|
|
|
- return "date"
|
|
|
- return "number"
|
|
|
-
|
|
|
-
|
|
|
-class NumericColumnNoLookup(NumericColumn):
|
|
|
- def __init__(self, name, allow_array_value=False):
|
|
|
- super().__init__(name)
|
|
|
- self.allow_array_value = allow_array_value
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- # `measurement_value` and `span_op_breakdowns_value` are actually an
|
|
|
- # array of Float64s. But when used in this context, we always want to
|
|
|
- # expand it using `arrayJoin`. The resulting column will be a numeric
|
|
|
- # column of type Float64.
|
|
|
- if self.allow_array_value:
|
|
|
- if value in {"measurements_value", "span_op_breakdowns_value"}:
|
|
|
- return ["arrayJoin", [value]]
|
|
|
-
|
|
|
- super().normalize(value, params)
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class DurationColumn(FunctionArg):
|
|
|
- def normalize(self, value, params):
|
|
|
- snuba_column = SEARCH_MAP.get(value)
|
|
|
- if not snuba_column and is_duration_measurement(value):
|
|
|
- return value
|
|
|
- if not snuba_column and is_span_op_breakdown(value):
|
|
|
- return value
|
|
|
- if not snuba_column:
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a valid column")
|
|
|
- elif snuba_column != "duration":
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a duration column")
|
|
|
- return snuba_column
|
|
|
-
|
|
|
-
|
|
|
-class DurationColumnNoLookup(DurationColumn):
|
|
|
- def normalize(self, value, params):
|
|
|
- super().normalize(value, params)
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class StringArrayColumn(FunctionArg):
|
|
|
- def normalize(self, value, params):
|
|
|
- if value in ["tags.key", "tags.value", "measurements_key", "span_op_breakdowns_key"]:
|
|
|
- return value
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a valid string array column")
|
|
|
-
|
|
|
-
|
|
|
-class NumberRange(FunctionArg):
|
|
|
- def __init__(self, name, start, end):
|
|
|
- super().__init__(name)
|
|
|
- self.start = start
|
|
|
- self.end = end
|
|
|
-
|
|
|
- def normalize(self, value, params):
|
|
|
- try:
|
|
|
- value = float(value)
|
|
|
- except ValueError:
|
|
|
- raise InvalidFunctionArgument(f"{value} is not a number")
|
|
|
-
|
|
|
- if self.start and value < self.start:
|
|
|
- raise InvalidFunctionArgument(
|
|
|
- f"{value:g} must be greater than or equal to {self.start:g}"
|
|
|
- )
|
|
|
- elif self.end and value >= self.end:
|
|
|
- raise InvalidFunctionArgument(f"{value:g} must be less than {self.end:g}")
|
|
|
-
|
|
|
- return value
|
|
|
-
|
|
|
-
|
|
|
-class IntervalDefault(NumberRange):
|
|
|
- def __init__(self, name, start, end):
|
|
|
- super().__init__(name, start, end)
|
|
|
- self.has_default = True
|
|
|
-
|
|
|
- def get_default(self, params):
|
|
|
- if not params or not params.get("start") or not params.get("end"):
|
|
|
- raise InvalidFunctionArgument("function called without default")
|
|
|
- elif not isinstance(params.get("start"), datetime) or not isinstance(
|
|
|
- params.get("end"), datetime
|
|
|
- ):
|
|
|
- raise InvalidFunctionArgument("function called with invalid default")
|
|
|
-
|
|
|
- interval = (params["end"] - params["start"]).total_seconds()
|
|
|
- return int(interval)
|
|
|
-
|
|
|
-
|
|
|
-def with_default(default, argument):
|
|
|
- argument.has_default = True
|
|
|
- argument.get_default = lambda *_: default
|
|
|
- return argument
|
|
|
-
|
|
|
-
|
|
|
-class Function:
|
|
|
- def __init__(
|
|
|
- self,
|
|
|
- name,
|
|
|
- required_args=None,
|
|
|
- optional_args=None,
|
|
|
- calculated_args=None,
|
|
|
- column=None,
|
|
|
- aggregate=None,
|
|
|
- transform=None,
|
|
|
- result_type_fn=None,
|
|
|
- default_result_type=None,
|
|
|
- redundant_grouping=False,
|
|
|
- private=False,
|
|
|
- ):
|
|
|
- """
|
|
|
- Specifies a function interface that must be followed when defining new functions
|
|
|
-
|
|
|
- :param str name: The name of the function, this refers to the name to invoke.
|
|
|
- :param list[FunctionArg] required_args: The list of required arguments to the function.
|
|
|
- If any of these arguments are not specified, an error will be raised.
|
|
|
- :param list[FunctionArg] optional_args: The list of optional arguments to the function.
|
|
|
- If any of these arguments are not specified, they will be filled using their default value.
|
|
|
- :param list[obj] calculated_args: The list of calculated arguments to the function.
|
|
|
- These arguments will be computed based on the list of specified arguments.
|
|
|
- :param [str, [any], str or None] column: The column to be passed to snuba once formatted.
|
|
|
- The arguments will be filled into the column where needed. This must not be an aggregate.
|
|
|
- :param [str, [any], str or None] aggregate: The aggregate to be passed to snuba once formatted.
|
|
|
- The arguments will be filled into the aggregate where needed. This must be an aggregate.
|
|
|
- :param str transform: NOTE: Use aggregate over transform whenever possible.
|
|
|
- An aggregate string to be passed to snuba once formatted. The arguments
|
|
|
- will be filled into the string using `.format(...)`.
|
|
|
- :param str result_type_fn: A function to call with in order to determine the result type.
|
|
|
- This function will be passed the list of argument classes and argument values. This should
|
|
|
- be tried first as the source of truth if available.
|
|
|
- :param str default_result_type: The default resulting type of this function. Must be a type
|
|
|
- defined by RESULTS_TYPES.
|
|
|
- :param bool redundant_grouping: This function will result in redundant grouping if its column
|
|
|
- is included as a field as well.
|
|
|
- :param bool private: Whether or not this function should be disabled for general use.
|
|
|
- """
|
|
|
-
|
|
|
- self.name = name
|
|
|
- self.required_args = [] if required_args is None else required_args
|
|
|
- self.optional_args = [] if optional_args is None else optional_args
|
|
|
- self.calculated_args = [] if calculated_args is None else calculated_args
|
|
|
- self.column = column
|
|
|
- self.aggregate = aggregate
|
|
|
- self.transform = transform
|
|
|
- self.result_type_fn = result_type_fn
|
|
|
- self.default_result_type = default_result_type
|
|
|
- self.redundant_grouping = redundant_grouping
|
|
|
- self.private = private
|
|
|
-
|
|
|
- self.validate()
|
|
|
-
|
|
|
- @property
|
|
|
- def required_args_count(self):
|
|
|
- return len(self.required_args)
|
|
|
-
|
|
|
- @property
|
|
|
- def optional_args_count(self):
|
|
|
- return len(self.optional_args)
|
|
|
-
|
|
|
- @property
|
|
|
- def total_args_count(self):
|
|
|
- return self.required_args_count + self.optional_args_count
|
|
|
-
|
|
|
- @property
|
|
|
- def args(self):
|
|
|
- return self.required_args + self.optional_args
|
|
|
-
|
|
|
- def alias_as(self, name):
|
|
|
- """ Create a copy of this function to be used as an alias """
|
|
|
- alias = deepcopy(self)
|
|
|
- alias.name = name
|
|
|
- return alias
|
|
|
-
|
|
|
- def add_default_arguments(self, field, columns, params):
|
|
|
- # make sure to validate the argument count first to
|
|
|
- # ensure the right number of arguments have been passed
|
|
|
- self.validate_argument_count(field, columns)
|
|
|
-
|
|
|
- columns = [column for column in columns]
|
|
|
-
|
|
|
- # use default values to populate optional arguments if any
|
|
|
- for argument in self.args[len(columns) :]:
|
|
|
- try:
|
|
|
- default = argument.get_default(params)
|
|
|
- except InvalidFunctionArgument as e:
|
|
|
- raise InvalidSearchQuery(f"{field}: invalid arguments: {e}")
|
|
|
-
|
|
|
- # Hacky, but we expect column arguments to be strings so easiest to convert it back
|
|
|
- columns.append(str(default) if default else default)
|
|
|
-
|
|
|
- return columns
|
|
|
-
|
|
|
- def format_as_arguments(self, field, columns, params):
|
|
|
- columns = self.add_default_arguments(field, columns, params)
|
|
|
-
|
|
|
- arguments = {}
|
|
|
-
|
|
|
- # normalize the arguments before putting them in a dict
|
|
|
- for argument, column in zip(self.args, columns):
|
|
|
- try:
|
|
|
- arguments[argument.name] = argument.normalize(column, params)
|
|
|
- except InvalidFunctionArgument as e:
|
|
|
- raise InvalidSearchQuery(f"{field}: {argument.name} argument invalid: {e}")
|
|
|
-
|
|
|
- # populate any computed args
|
|
|
- for calculation in self.calculated_args:
|
|
|
- arguments[calculation["name"]] = calculation["fn"](arguments)
|
|
|
-
|
|
|
- return arguments
|
|
|
-
|
|
|
- def get_result_type(self, field=None, arguments=None):
|
|
|
- if field is None or arguments is None or self.result_type_fn is None:
|
|
|
- return self.default_result_type
|
|
|
-
|
|
|
- result_type = self.result_type_fn(self.args, arguments)
|
|
|
- if result_type is None:
|
|
|
- return self.default_result_type
|
|
|
-
|
|
|
- self.validate_result_type(result_type)
|
|
|
- return result_type
|
|
|
-
|
|
|
- def validate(self):
|
|
|
- # assert that all optional args have defaults available
|
|
|
- for i, arg in enumerate(self.optional_args):
|
|
|
- assert (
|
|
|
- arg.has_default
|
|
|
- ), f"{self.name}: optional argument at index {i} does not have default"
|
|
|
-
|
|
|
- # assert that the function has only one of the following specified
|
|
|
- # `column`, `aggregate`, or `transform`
|
|
|
- assert (
|
|
|
- sum([self.column is not None, self.aggregate is not None, self.transform is not None])
|
|
|
- == 1
|
|
|
- ), f"{self.name}: only one of column, aggregate, or transform is allowed"
|
|
|
-
|
|
|
- # assert that no duplicate argument names are used
|
|
|
- names = set()
|
|
|
- for arg in self.args:
|
|
|
- assert (
|
|
|
- arg.name not in names
|
|
|
- ), f"{self.name}: argument {arg.name} specified more than once"
|
|
|
- names.add(arg.name)
|
|
|
-
|
|
|
- for calculation in self.calculated_args:
|
|
|
- assert (
|
|
|
- calculation["name"] not in names
|
|
|
- ), "{}: argument {} specified more than once".format(self.name, calculation["name"])
|
|
|
- names.add(calculation["name"])
|
|
|
-
|
|
|
- self.validate_result_type(self.default_result_type)
|
|
|
-
|
|
|
- def validate_argument_count(self, field, arguments):
|
|
|
- """
|
|
|
- Validate the number of required arguments the function defines against
|
|
|
- provided arguments. Raise an exception if there is a mismatch in the
|
|
|
- number of arguments. Do not return any values.
|
|
|
-
|
|
|
- There are 4 cases:
|
|
|
- 1. provided # of arguments != required # of arguments AND provided # of arguments != total # of arguments (bad, raise an error)
|
|
|
- 2. provided # of arguments < required # of arguments (bad, raise an error)
|
|
|
- 3. provided # of arguments > total # of arguments (bad, raise an error)
|
|
|
- 4. required # of arguments <= provided # of arguments <= total # of arguments (good, pass the validation)
|
|
|
- """
|
|
|
- args_count = len(arguments)
|
|
|
- total_args_count = self.total_args_count
|
|
|
- if args_count != total_args_count:
|
|
|
- required_args_count = self.required_args_count
|
|
|
- if required_args_count == total_args_count:
|
|
|
- raise InvalidSearchQuery(f"{field}: expected {total_args_count:g} argument(s)")
|
|
|
- elif args_count < required_args_count:
|
|
|
- raise InvalidSearchQuery(
|
|
|
- f"{field}: expected at least {required_args_count:g} argument(s)"
|
|
|
- )
|
|
|
- elif args_count > total_args_count:
|
|
|
- raise InvalidSearchQuery(
|
|
|
- f"{field}: expected at most {total_args_count:g} argument(s)"
|
|
|
- )
|
|
|
-
|
|
|
- def validate_result_type(self, result_type):
|
|
|
- assert (
|
|
|
- result_type is None or result_type in RESULT_TYPES
|
|
|
- ), f"{self.name}: result type {result_type} not one of {list(RESULT_TYPES)}"
|
|
|
-
|
|
|
- def is_accessible(self, acl=None):
|
|
|
- if not self.private:
|
|
|
- return True
|
|
|
- elif not acl:
|
|
|
- return False
|
|
|
- return self.name in acl
|
|
|
-
|
|
|
-
|
|
|
-def reflective_result_type(index=0):
|
|
|
- def result_type_fn(function_arguments, parameter_values):
|
|
|
- argument = function_arguments[index]
|
|
|
- value = parameter_values[argument.name]
|
|
|
- return argument.get_type(value)
|
|
|
-
|
|
|
- return result_type_fn
|
|
|
-
|
|
|
-
|
|
|
-# When updating this list, also check if the following need to be updated:
|
|
|
-# - convert_search_filter_to_snuba_query
|
|
|
-# - static/app/utils/discover/fields.tsx FIELDS (for discover column list and search box autocomplete)
|
|
|
-FUNCTIONS = {
|
|
|
- function.name: function
|
|
|
- for function in [
|
|
|
- Function(
|
|
|
- "percentile",
|
|
|
- required_args=[NumericColumnNoLookup("column"), NumberRange("percentile", 0, 1)],
|
|
|
- aggregate=["quantile({percentile:g})", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "p50",
|
|
|
- optional_args=[with_default("transaction.duration", NumericColumnNoLookup("column"))],
|
|
|
- aggregate=["quantile(0.5)", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "p75",
|
|
|
- optional_args=[with_default("transaction.duration", NumericColumnNoLookup("column"))],
|
|
|
- aggregate=["quantile(0.75)", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "p95",
|
|
|
- optional_args=[with_default("transaction.duration", NumericColumnNoLookup("column"))],
|
|
|
- aggregate=["quantile(0.95)", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "p99",
|
|
|
- optional_args=[with_default("transaction.duration", NumericColumnNoLookup("column"))],
|
|
|
- aggregate=["quantile(0.99)", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "p100",
|
|
|
- optional_args=[with_default("transaction.duration", NumericColumnNoLookup("column"))],
|
|
|
- aggregate=["max", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "eps",
|
|
|
- optional_args=[IntervalDefault("interval", 1, None)],
|
|
|
- transform="divide(count(), {interval:g})",
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "epm",
|
|
|
- optional_args=[IntervalDefault("interval", 1, None)],
|
|
|
- transform="divide(count(), divide({interval:g}, 60))",
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "last_seen",
|
|
|
- aggregate=["max", "timestamp", "last_seen"],
|
|
|
- default_result_type="date",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "latest_event",
|
|
|
- aggregate=["argMax", ["id", "timestamp"], "latest_event"],
|
|
|
- default_result_type="string",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "apdex",
|
|
|
- required_args=[NumberRange("satisfaction", 0, None)],
|
|
|
- transform="apdex(duration, {satisfaction:g})",
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "count_miserable",
|
|
|
- required_args=[CountColumn("column"), NumberRange("satisfaction", 0, None)],
|
|
|
- calculated_args=[{"name": "tolerated", "fn": lambda args: args["satisfaction"] * 4.0}],
|
|
|
- aggregate=[
|
|
|
- "uniqIf",
|
|
|
- [ArgValue("column"), ["greater", ["transaction.duration", ArgValue("tolerated")]]],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "user_misery",
|
|
|
- required_args=[NumberRange("satisfaction", 0, None)],
|
|
|
- # To correct for sensitivity to low counts, User Misery is modeled as a Beta Distribution Function.
|
|
|
- # With prior expectations, we have picked the expected mean user misery to be 0.05 and variance
|
|
|
- # to be 0.0004. This allows us to calculate the alpha (5.8875) and beta (111.8625) parameters,
|
|
|
- # with the user misery being adjusted for each fast/slow unique transaction. See:
|
|
|
- # https://stats.stackexchange.com/questions/47771/what-is-the-intuition-behind-beta-distribution
|
|
|
- # for an intuitive explanation of the Beta Distribution Function.
|
|
|
- optional_args=[
|
|
|
- with_default(5.8875, NumberRange("alpha", 0, None)),
|
|
|
- with_default(111.8625, NumberRange("beta", 0, None)),
|
|
|
- ],
|
|
|
- calculated_args=[
|
|
|
- {"name": "tolerated", "fn": lambda args: args["satisfaction"] * 4.0},
|
|
|
- {"name": "parameter_sum", "fn": lambda args: args["alpha"] + args["beta"]},
|
|
|
- ],
|
|
|
- transform="ifNull(divide(plus(uniqIf(user, greater(duration, {tolerated:g})), {alpha}), plus(uniq(user), {parameter_sum})), 0)",
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function("failure_rate", transform="failure_rate()", default_result_type="percentage"),
|
|
|
- Function(
|
|
|
- "failure_count",
|
|
|
- aggregate=[
|
|
|
- "countIf",
|
|
|
- [
|
|
|
- [
|
|
|
- "not",
|
|
|
- [
|
|
|
- [
|
|
|
- "has",
|
|
|
- [
|
|
|
- [
|
|
|
- "array",
|
|
|
- [
|
|
|
- SPAN_STATUS_NAME_TO_CODE[name]
|
|
|
- for name in ["ok", "cancelled", "unknown"]
|
|
|
- ],
|
|
|
- ],
|
|
|
- "transaction_status",
|
|
|
- ],
|
|
|
- ],
|
|
|
- ],
|
|
|
- ],
|
|
|
- ],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="integer",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "array_join",
|
|
|
- required_args=[StringArrayColumn("column")],
|
|
|
- column=["arrayJoin", [ArgValue("column")], None],
|
|
|
- default_result_type="string",
|
|
|
- private=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "histogram",
|
|
|
- required_args=[
|
|
|
- NumericColumnNoLookup("column", allow_array_value=True),
|
|
|
- # the bucket_size and start_offset should already be adjusted
|
|
|
- # using the multiplier before it is passed here
|
|
|
- NumberRange("bucket_size", 0, None),
|
|
|
- NumberRange("start_offset", 0, None),
|
|
|
- NumberRange("multiplier", 1, None),
|
|
|
- ],
|
|
|
- # floor((x * multiplier - start_offset) / bucket_size) * bucket_size + start_offset
|
|
|
- column=[
|
|
|
- "plus",
|
|
|
- [
|
|
|
- [
|
|
|
- "multiply",
|
|
|
- [
|
|
|
- [
|
|
|
- "floor",
|
|
|
- [
|
|
|
- [
|
|
|
- "divide",
|
|
|
- [
|
|
|
- [
|
|
|
- "minus",
|
|
|
- [
|
|
|
- [
|
|
|
- "multiply",
|
|
|
- [
|
|
|
- ArgValue("column"),
|
|
|
- ArgValue("multiplier"),
|
|
|
- ],
|
|
|
- ],
|
|
|
- ArgValue("start_offset"),
|
|
|
- ],
|
|
|
- ],
|
|
|
- ArgValue("bucket_size"),
|
|
|
- ],
|
|
|
- ],
|
|
|
- ],
|
|
|
- ],
|
|
|
- ArgValue("bucket_size"),
|
|
|
- ],
|
|
|
- ],
|
|
|
- ArgValue("start_offset"),
|
|
|
- ],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="number",
|
|
|
- private=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "count_unique",
|
|
|
- optional_args=[CountColumn("column")],
|
|
|
- aggregate=["uniq", ArgValue("column"), None],
|
|
|
- default_result_type="integer",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "count",
|
|
|
- optional_args=[NullColumn("column")],
|
|
|
- aggregate=["count", None, None],
|
|
|
- default_result_type="integer",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "count_at_least",
|
|
|
- required_args=[NumericColumnNoLookup("column"), NumberRange("threshold", 0, None)],
|
|
|
- aggregate=[
|
|
|
- "countIf",
|
|
|
- [["greaterOrEquals", [ArgValue("column"), ArgValue("threshold")]]],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="integer",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "min",
|
|
|
- required_args=[NumericColumnNoLookup("column")],
|
|
|
- aggregate=["min", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "max",
|
|
|
- required_args=[NumericColumnNoLookup("column")],
|
|
|
- aggregate=["max", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "avg",
|
|
|
- required_args=[NumericColumnNoLookup("column")],
|
|
|
- aggregate=["avg", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "var",
|
|
|
- required_args=[NumericColumnNoLookup("column")],
|
|
|
- aggregate=["varSamp", ArgValue("column"), None],
|
|
|
- default_result_type="number",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "stddev",
|
|
|
- required_args=[NumericColumnNoLookup("column")],
|
|
|
- aggregate=["stddevSamp", ArgValue("column"), None],
|
|
|
- default_result_type="number",
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "sum",
|
|
|
- required_args=[NumericColumnNoLookup("column")],
|
|
|
- aggregate=["sum", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- default_result_type="duration",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "any",
|
|
|
- required_args=[FieldColumn("column")],
|
|
|
- aggregate=["min", ArgValue("column"), None],
|
|
|
- result_type_fn=reflective_result_type(),
|
|
|
- redundant_grouping=True,
|
|
|
- ),
|
|
|
- # Currently only being used by the baseline PoC
|
|
|
- Function(
|
|
|
- "absolute_delta",
|
|
|
- required_args=[DurationColumnNoLookup("column"), NumberRange("target", 0, None)],
|
|
|
- column=["abs", [["minus", [ArgValue("column"), ArgValue("target")]]], None],
|
|
|
- default_result_type="duration",
|
|
|
- ),
|
|
|
- # These range functions for performance trends, these aren't If functions
|
|
|
- # to avoid allowing arbitrary if statements
|
|
|
- # Not yet supported in Discover, and shouldn't be added to fields.tsx
|
|
|
- Function(
|
|
|
- "percentile_range",
|
|
|
- required_args=[
|
|
|
- NumericColumnNoLookup("column"),
|
|
|
- NumberRange("percentile", 0, 1),
|
|
|
- ConditionArg("condition"),
|
|
|
- DateArg("middle"),
|
|
|
- ],
|
|
|
- aggregate=[
|
|
|
- "quantileIf({percentile:.2f})",
|
|
|
- [
|
|
|
- ArgValue("column"),
|
|
|
- # NOTE: This condition is written in this seemingly backwards way
|
|
|
- # because of how snuba special cases the following syntax
|
|
|
- # ["a", ["b", ["c", ["d"]]]
|
|
|
- #
|
|
|
- # This array is can be interpreted 2 ways
|
|
|
- # 1. a(b(c(d))) the way snuba interprets it
|
|
|
- # - snuba special cases it when it detects an array where the first
|
|
|
- # element is a literal, and the second element is an array and
|
|
|
- # treats it as a function call rather than 2 separate arguments
|
|
|
- # 2. a(b, c(d)) the way we want it to be interpreted
|
|
|
- #
|
|
|
- # Because of how snuba interprets this expression, it makes it impossible
|
|
|
- # to specify a function with 2 arguments whose first argument is a literal
|
|
|
- # and the second argument is an expression.
|
|
|
- #
|
|
|
- # Working with this limitation, we have to invert the conditions in
|
|
|
- # order to express a function whose first argument is an expression while
|
|
|
- # the second argument is a literal.
|
|
|
- [ArgValue("condition"), [["toDateTime", [ArgValue("middle")]], "timestamp"]],
|
|
|
- ],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="duration",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "avg_range",
|
|
|
- required_args=[
|
|
|
- NumericColumnNoLookup("column"),
|
|
|
- ConditionArg("condition"),
|
|
|
- DateArg("middle"),
|
|
|
- ],
|
|
|
- aggregate=[
|
|
|
- "avgIf",
|
|
|
- [
|
|
|
- ArgValue("column"),
|
|
|
- # see `percentile_range` for why this condition feels backwards
|
|
|
- [ArgValue("condition"), [["toDateTime", [ArgValue("middle")]], "timestamp"]],
|
|
|
- ],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="duration",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "variance_range",
|
|
|
- required_args=[
|
|
|
- NumericColumnNoLookup("column"),
|
|
|
- ConditionArg("condition"),
|
|
|
- DateArg("middle"),
|
|
|
- ],
|
|
|
- aggregate=[
|
|
|
- "varSampIf",
|
|
|
- [
|
|
|
- ArgValue("column"),
|
|
|
- # see `percentile_range` for why this condition feels backwards
|
|
|
- [ArgValue("condition"), [["toDateTime", [ArgValue("middle")]], "timestamp"]],
|
|
|
- ],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="duration",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "count_range",
|
|
|
- required_args=[ConditionArg("condition"), DateArg("middle")],
|
|
|
- aggregate=[
|
|
|
- "countIf",
|
|
|
- # see `percentile_range` for why this condition feels backwards
|
|
|
- [[ArgValue("condition"), [["toDateTime", [ArgValue("middle")]], "timestamp"]]],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="integer",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "percentage",
|
|
|
- required_args=[FunctionArg("numerator"), FunctionArg("denominator")],
|
|
|
- # Since percentage is only used on aggregates, it needs to be an aggregate and not a column
|
|
|
- # This is because as a column it will be added to the `WHERE` clause instead of the `HAVING` clause
|
|
|
- aggregate=[
|
|
|
- "if(greater({denominator},0),divide({numerator},{denominator}),null)",
|
|
|
- None,
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="percentage",
|
|
|
- ),
|
|
|
- # Calculate the Welch's t-test value, this is used to help identify which of our trends are significant or not
|
|
|
- Function(
|
|
|
- "t_test",
|
|
|
- required_args=[
|
|
|
- FunctionAliasArg("avg_1"),
|
|
|
- FunctionAliasArg("avg_2"),
|
|
|
- FunctionAliasArg("variance_1"),
|
|
|
- FunctionAliasArg("variance_2"),
|
|
|
- FunctionAliasArg("count_1"),
|
|
|
- FunctionAliasArg("count_2"),
|
|
|
- ],
|
|
|
- aggregate=[
|
|
|
- "divide(minus({avg_1},{avg_2}),sqrt(plus(divide({variance_1},{count_1}),divide({variance_2},{count_2}))))",
|
|
|
- None,
|
|
|
- "t_test",
|
|
|
- ],
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "minus",
|
|
|
- required_args=[FunctionArg("minuend"), FunctionArg("subtrahend")],
|
|
|
- aggregate=["minus", [ArgValue("minuend"), ArgValue("subtrahend")], None],
|
|
|
- default_result_type="duration",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "absolute_correlation",
|
|
|
- aggregate=[
|
|
|
- "abs",
|
|
|
- [["corr", [["toUnixTimestamp", ["timestamp"]], "transaction.duration"]]],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- # Currently only used by trace meta so we can count event types which is why this only accepts strings
|
|
|
- Function(
|
|
|
- "count_if",
|
|
|
- required_args=[
|
|
|
- ColumnNoLookup("column", allowed_columns=["event.type", "http.status_code"]),
|
|
|
- ConditionArg("condition"),
|
|
|
- StringArg("value"),
|
|
|
- ],
|
|
|
- aggregate=[
|
|
|
- "countIf",
|
|
|
- [
|
|
|
- [
|
|
|
- ArgValue("condition"),
|
|
|
- [
|
|
|
- ArgValue("column"),
|
|
|
- ArgValue("value"),
|
|
|
- ],
|
|
|
- ]
|
|
|
- ],
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="integer",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "compare_numeric_aggregate",
|
|
|
- required_args=[
|
|
|
- FunctionAliasArg("aggregate_alias"),
|
|
|
- ConditionArg("condition"),
|
|
|
- NumberRange("value", 0, None),
|
|
|
- ],
|
|
|
- aggregate=[
|
|
|
- # snuba json syntax isn't compatible with this query here
|
|
|
- # this function can't be a column, since we want to use this with aggregates
|
|
|
- "{condition}({aggregate_alias},{value})",
|
|
|
- None,
|
|
|
- None,
|
|
|
- ],
|
|
|
- default_result_type="number",
|
|
|
- ),
|
|
|
- Function(
|
|
|
- "to_other",
|
|
|
- required_args=[
|
|
|
- ColumnNoLookup("column", allowed_columns=["release", "trace.parent_span"]),
|
|
|
- StringArg("value", unquote=True, unescape_quotes=True),
|
|
|
- ],
|
|
|
- optional_args=[
|
|
|
- with_default("that", StringArg("that")),
|
|
|
- with_default("this", StringArg("this")),
|
|
|
- ],
|
|
|
- column=[
|
|
|
- "if",
|
|
|
- [
|
|
|
- ["equals", [ArgValue("column"), ArgValue("value")]],
|
|
|
- ArgValue("this"),
|
|
|
- ArgValue("that"),
|
|
|
- ],
|
|
|
- ],
|
|
|
- ),
|
|
|
- ]
|
|
|
-}
|
|
|
-# In Performance TPM is used as an alias to EPM
|
|
|
-FUNCTION_ALIASES = {
|
|
|
- "tpm": "epm",
|
|
|
- "tps": "eps",
|
|
|
-}
|
|
|
-for alias, name in FUNCTION_ALIASES.items():
|
|
|
- FUNCTIONS[alias] = FUNCTIONS[name].alias_as(alias)
|
|
|
-
|
|
|
-
|
|
|
-FUNCTION_ALIAS_PATTERN = re.compile(r"^({}).*".format("|".join(list(FUNCTIONS.keys()))))
|
|
|
-
|
|
|
-
|
|
|
-def is_function(field):
|
|
|
- function_match = FUNCTION_PATTERN.search(field)
|
|
|
- if function_match:
|
|
|
- return function_match
|
|
|
-
|
|
|
- return None
|
|
|
-
|
|
|
-
|
|
|
-def get_function_alias(field):
|
|
|
- match = FUNCTION_PATTERN.search(field)
|
|
|
- if match is None:
|
|
|
- return field
|
|
|
-
|
|
|
- if match.group("alias") is not None:
|
|
|
- return match.group("alias")
|
|
|
- function = match.group("function")
|
|
|
- columns = parse_arguments(function, match.group("columns"))
|
|
|
- return get_function_alias_with_columns(function, columns)
|
|
|
-
|
|
|
-
|
|
|
-def get_function_alias_with_columns(function_name, columns):
|
|
|
- columns = re.sub(r"[^\w]", "_", "_".join(columns))
|
|
|
- return f"{function_name}_{columns}".rstrip("_")
|
|
|
-
|
|
|
-
|
|
|
-def format_column_arguments(column_args, arguments):
|
|
|
- for i in range(len(column_args)):
|
|
|
- if isinstance(column_args[i], (list, tuple)):
|
|
|
- if isinstance(column_args[i][0], ArgValue):
|
|
|
- column_args[i][0] = arguments[column_args[i][0].arg]
|
|
|
- format_column_arguments(column_args[i][1], arguments)
|
|
|
- elif isinstance(column_args[i], str):
|
|
|
- column_args[i] = column_args[i].format(**arguments)
|
|
|
- elif isinstance(column_args[i], ArgValue):
|
|
|
- column_args[i] = arguments[column_args[i].arg]
|
|
|
-
|
|
|
-
|
|
|
-def parse_arguments(function, columns):
|
|
|
- """
|
|
|
- The to_other function takes a quoted string for one of its arguments
|
|
|
- that may contain commas, so it requires special handling.
|
|
|
- """
|
|
|
- if function != "to_other":
|
|
|
- return [c.strip() for c in columns.split(",") if len(c.strip()) > 0]
|
|
|
-
|
|
|
- args = []
|
|
|
-
|
|
|
- quoted = False
|
|
|
- escaped = False
|
|
|
-
|
|
|
- i, j = 0, 0
|
|
|
-
|
|
|
- while j < len(columns):
|
|
|
- if i == j and columns[j] == '"':
|
|
|
- # when we see a quote at the beginning of
|
|
|
- # an argument, then this is a quoted string
|
|
|
- quoted = True
|
|
|
- elif quoted and not escaped and columns[j] == "\\":
|
|
|
- # when we see a slash inside a quoted string,
|
|
|
- # the next character is an escape character
|
|
|
- escaped = True
|
|
|
- elif quoted and not escaped and columns[j] == '"':
|
|
|
- # when we see a non-escaped quote while inside
|
|
|
- # of a quoted string, we should end it
|
|
|
- quoted = False
|
|
|
- elif quoted and escaped:
|
|
|
- # when we are inside a quoted string and have
|
|
|
- # begun an escape character, we should end it
|
|
|
- escaped = False
|
|
|
- elif quoted and columns[j] == ",":
|
|
|
- # when we are inside a quoted string and see
|
|
|
- # a comma, it should not be considered an
|
|
|
- # argument separator
|
|
|
- pass
|
|
|
- elif columns[j] == ",":
|
|
|
- # when we see a comma outside of a quoted string
|
|
|
- # it is an argument separator
|
|
|
- args.append(columns[i:j].strip())
|
|
|
- i = j + 1
|
|
|
- j += 1
|
|
|
-
|
|
|
- if i != j:
|
|
|
- # add in the last argument if any
|
|
|
- args.append(columns[i:].strip())
|
|
|
-
|
|
|
- return [arg for arg in args if arg]
|
|
|
-
|
|
|
-
|
|
|
-def parse_function(field, match=None, err_msg=None):
|
|
|
- if not match:
|
|
|
- match = is_function(field)
|
|
|
-
|
|
|
- if not match or match.group("function") not in FUNCTIONS:
|
|
|
- if err_msg is None:
|
|
|
- err_msg = f"{field} is not a valid function"
|
|
|
- raise InvalidSearchQuery(err_msg)
|
|
|
-
|
|
|
- function = match.group("function")
|
|
|
- return (
|
|
|
- function,
|
|
|
- parse_arguments(function, match.group("columns")),
|
|
|
- match.group("alias"),
|
|
|
- )
|
|
|
-
|
|
|
-
|
|
|
-FunctionDetails = namedtuple("FunctionDetails", "field instance arguments")
|
|
|
-ResolvedFunction = namedtuple("ResolvedFunction", "details column aggregate")
|
|
|
-
|
|
|
-
|
|
|
-def resolve_function(field, match=None, params=None, functions_acl=False):
|
|
|
- if params is not None and field in params.get("aliases", {}):
|
|
|
- alias = params["aliases"][field]
|
|
|
- return ResolvedFunction(
|
|
|
- FunctionDetails(field, FUNCTIONS["percentage"], []),
|
|
|
- None,
|
|
|
- alias.aggregate,
|
|
|
- )
|
|
|
- function_name, columns, alias = parse_function(field, match)
|
|
|
- function = FUNCTIONS[function_name]
|
|
|
- if not function.is_accessible(functions_acl):
|
|
|
- raise InvalidSearchQuery(f"{function.name}: no access to private function")
|
|
|
-
|
|
|
- arguments = function.format_as_arguments(field, columns, params)
|
|
|
- details = FunctionDetails(field, function, arguments)
|
|
|
-
|
|
|
- if function.transform is not None:
|
|
|
- snuba_string = function.transform.format(**arguments)
|
|
|
- if alias is None:
|
|
|
- alias = get_function_alias_with_columns(function.name, columns)
|
|
|
- return ResolvedFunction(
|
|
|
- details,
|
|
|
- None,
|
|
|
- [snuba_string, None, alias],
|
|
|
- )
|
|
|
- elif function.aggregate is not None:
|
|
|
- aggregate = deepcopy(function.aggregate)
|
|
|
-
|
|
|
- aggregate[0] = aggregate[0].format(**arguments)
|
|
|
- if isinstance(aggregate[1], (list, tuple)):
|
|
|
- format_column_arguments(aggregate[1], arguments)
|
|
|
- elif isinstance(aggregate[1], ArgValue):
|
|
|
- arg = aggregate[1].arg
|
|
|
- # The aggregate function has only a single argument
|
|
|
- # however that argument is an expression, so we have
|
|
|
- # to make sure to nest it so it doesn't get treated
|
|
|
- # as a list of arguments by snuba.
|
|
|
- if isinstance(arguments[arg], (list, tuple)):
|
|
|
- aggregate[1] = [arguments[arg]]
|
|
|
- else:
|
|
|
- aggregate[1] = arguments[arg]
|
|
|
-
|
|
|
- if alias is not None:
|
|
|
- aggregate[2] = alias
|
|
|
- elif aggregate[2] is None:
|
|
|
- aggregate[2] = get_function_alias_with_columns(function.name, columns)
|
|
|
-
|
|
|
- return ResolvedFunction(details, None, aggregate)
|
|
|
- elif function.column is not None:
|
|
|
- # These can be very nested functions, so we need to iterate through all the layers
|
|
|
- addition = deepcopy(function.column)
|
|
|
- addition[0] = addition[0].format(**arguments)
|
|
|
- if isinstance(addition[1], (list, tuple)):
|
|
|
- format_column_arguments(addition[1], arguments)
|
|
|
- if len(addition) < 3:
|
|
|
- if alias is not None:
|
|
|
- addition.append(alias)
|
|
|
- else:
|
|
|
- addition.append(get_function_alias_with_columns(function.name, columns))
|
|
|
- elif len(addition) == 3:
|
|
|
- if alias is not None:
|
|
|
- addition[2] = alias
|
|
|
- elif addition[2] is None:
|
|
|
- addition[2] = get_function_alias_with_columns(function.name, columns)
|
|
|
- else:
|
|
|
- addition[2] = addition[2].format(**arguments)
|
|
|
- return ResolvedFunction(details, addition, None)
|
|
|
-
|
|
|
-
|
|
|
-def resolve_orderby(orderby, fields, aggregations):
|
|
|
- """
|
|
|
- We accept column names, aggregate functions, and aliases as order by
|
|
|
- values. Aggregates and field aliases need to be resolve/validated.
|
|
|
-
|
|
|
- TODO(mark) Once we're no longer using the dataset selection function
|
|
|
- should allow all non-tag fields to be used as sort clauses, instead of only
|
|
|
- those that are currently selected.
|
|
|
- """
|
|
|
- orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
|
|
|
- validated = []
|
|
|
- for column in orderby:
|
|
|
- bare_column = column.lstrip("-")
|
|
|
-
|
|
|
- if bare_column in fields:
|
|
|
- validated.append(column)
|
|
|
- continue
|
|
|
-
|
|
|
- if is_function(bare_column):
|
|
|
- bare_column = get_function_alias(bare_column)
|
|
|
-
|
|
|
- found = [agg[2] for agg in aggregations if agg[2] == bare_column]
|
|
|
- if found:
|
|
|
- prefix = "-" if column.startswith("-") else ""
|
|
|
- validated.append(prefix + bare_column)
|
|
|
- continue
|
|
|
-
|
|
|
- if (
|
|
|
- bare_column in FIELD_ALIASES
|
|
|
- and FIELD_ALIASES[bare_column].alias
|
|
|
- and bare_column != PROJECT_ALIAS
|
|
|
- ):
|
|
|
- prefix = "-" if column.startswith("-") else ""
|
|
|
- validated.append(prefix + FIELD_ALIASES[bare_column].alias)
|
|
|
- continue
|
|
|
-
|
|
|
- found = [
|
|
|
- col[2]
|
|
|
- for col in fields
|
|
|
- if isinstance(col, (list, tuple)) and col[2].strip("`") == bare_column
|
|
|
- ]
|
|
|
- if found:
|
|
|
- prefix = "-" if column.startswith("-") else ""
|
|
|
- validated.append(prefix + bare_column)
|
|
|
-
|
|
|
- if len(validated) == len(orderby):
|
|
|
- return validated
|
|
|
-
|
|
|
- raise InvalidSearchQuery("Cannot order by a field that is not selected.")
|
|
|
-
|
|
|
-
|
|
|
-def get_aggregate_alias(match):
|
|
|
- column = match.group("column").replace(".", "_")
|
|
|
- return "{}_{}".format(match.group("function"), column).rstrip("_")
|
|
|
-
|
|
|
-
|
|
|
-def resolve_field(field, params=None, functions_acl=None):
|
|
|
- if not isinstance(field, str):
|
|
|
- raise InvalidSearchQuery("Field names must be strings")
|
|
|
-
|
|
|
- match = is_function(field)
|
|
|
- if match:
|
|
|
- return resolve_function(field, match, params, functions_acl)
|
|
|
-
|
|
|
- if field in FIELD_ALIASES:
|
|
|
- special_field = FIELD_ALIASES[field]
|
|
|
- return ResolvedFunction(None, special_field.get_field(params), None)
|
|
|
-
|
|
|
- tag_match = TAG_KEY_RE.search(field)
|
|
|
- tag_field = tag_match.group("tag") if tag_match else field
|
|
|
-
|
|
|
- if VALID_FIELD_PATTERN.match(tag_field):
|
|
|
- return ResolvedFunction(None, field, None)
|
|
|
- else:
|
|
|
- raise InvalidSearchQuery(f"Invalid characters in field {field}")
|
|
|
-
|
|
|
-
|
|
|
-def resolve_field_list(
|
|
|
- fields, snuba_filter, auto_fields=True, auto_aggregations=False, functions_acl=None
|
|
|
-):
|
|
|
- """
|
|
|
- Expand a list of fields based on aliases and aggregate functions.
|
|
|
-
|
|
|
- Returns a dist of aggregations, selected_columns, and
|
|
|
- groupby that can be merged into the result of get_snuba_query_args()
|
|
|
- to build a more complete snuba query based on event search conventions.
|
|
|
-
|
|
|
- Auto aggregates are aggregates that will be automatically added to the
|
|
|
- list of aggregations when they're used in a condition. This is so that
|
|
|
- they can be used in a condition without having to manually add the
|
|
|
- aggregate to a field.
|
|
|
- """
|
|
|
- aggregations = []
|
|
|
- aggregate_fields = defaultdict(set)
|
|
|
- columns = []
|
|
|
- groupby = []
|
|
|
- project_key = ""
|
|
|
- functions = {}
|
|
|
-
|
|
|
- # If project is requested, we need to map ids to their names since snuba only has ids
|
|
|
- if "project" in fields:
|
|
|
- fields.remove("project")
|
|
|
- project_key = "project"
|
|
|
- # since project.name is more specific, if both are included use project.name instead of project
|
|
|
- if PROJECT_NAME_ALIAS in fields:
|
|
|
- fields.remove(PROJECT_NAME_ALIAS)
|
|
|
- project_key = PROJECT_NAME_ALIAS
|
|
|
- if project_key:
|
|
|
- if "project.id" not in fields:
|
|
|
- fields.append("project.id")
|
|
|
-
|
|
|
- for field in fields:
|
|
|
- if isinstance(field, str) and field.strip() == "":
|
|
|
- continue
|
|
|
- function = resolve_field(field, snuba_filter.params, functions_acl)
|
|
|
- if function.column is not None and function.column not in columns:
|
|
|
- columns.append(function.column)
|
|
|
- if function.details is not None and isinstance(function.column, (list, tuple)):
|
|
|
- functions[function.column[-1]] = function.details
|
|
|
- elif function.aggregate is not None:
|
|
|
- aggregations.append(function.aggregate)
|
|
|
- if function.details is not None and isinstance(function.aggregate, (list, tuple)):
|
|
|
- functions[function.aggregate[-1]] = function.details
|
|
|
- if function.details.instance.redundant_grouping:
|
|
|
- aggregate_fields[function.aggregate[1]].add(field)
|
|
|
-
|
|
|
- # Only auto aggregate when there's one other so the group by is not unexpectedly changed
|
|
|
- if auto_aggregations and snuba_filter.having and len(aggregations) > 0:
|
|
|
- for agg in snuba_filter.condition_aggregates:
|
|
|
- if agg not in snuba_filter.aliases:
|
|
|
- function = resolve_field(agg, snuba_filter.params, functions_acl)
|
|
|
- if function.aggregate is not None and function.aggregate not in aggregations:
|
|
|
- aggregations.append(function.aggregate)
|
|
|
- if function.details is not None and isinstance(
|
|
|
- function.aggregate, (list, tuple)
|
|
|
- ):
|
|
|
- functions[function.aggregate[-1]] = function.details
|
|
|
-
|
|
|
- if function.details.instance.redundant_grouping:
|
|
|
- aggregate_fields[function.aggregate[1]].add(field)
|
|
|
-
|
|
|
- rollup = snuba_filter.rollup
|
|
|
- if not rollup and auto_fields:
|
|
|
- # Ensure fields we require to build a functioning interface
|
|
|
- # are present. We don't add fields when using a rollup as the additional fields
|
|
|
- # would be aggregated away.
|
|
|
- if not aggregations and "id" not in columns:
|
|
|
- columns.append("id")
|
|
|
- if "id" in columns and "project.id" not in columns:
|
|
|
- columns.append("project.id")
|
|
|
- project_key = PROJECT_NAME_ALIAS
|
|
|
-
|
|
|
- if project_key:
|
|
|
- # Check to see if there's a condition on project ID already, to avoid unnecessary lookups
|
|
|
- filtered_project_ids = None
|
|
|
- if snuba_filter.conditions:
|
|
|
- for cond in snuba_filter.conditions:
|
|
|
- if cond[0] == "project_id":
|
|
|
- filtered_project_ids = [cond[2]] if cond[1] == "=" else cond[2]
|
|
|
-
|
|
|
- project_ids = filtered_project_ids or snuba_filter.filter_keys.get("project_id", [])
|
|
|
- projects = Project.objects.filter(id__in=project_ids).values("slug", "id")
|
|
|
- # Clickhouse gets confused when the column contains a period
|
|
|
- # This is specifically for project.name and should be removed once we can stop supporting it
|
|
|
- if "." in project_key:
|
|
|
- project_key = f"`{project_key}`"
|
|
|
- columns.append(
|
|
|
- [
|
|
|
- "transform",
|
|
|
- [
|
|
|
- # This is a workaround since having the column by itself currently is being treated as a function
|
|
|
- ["toString", ["project_id"]],
|
|
|
- ["array", ["'{}'".format(project["id"]) for project in projects]],
|
|
|
- ["array", ["'{}'".format(project["slug"]) for project in projects]],
|
|
|
- # Default case, what to do if a project id without a slug is found
|
|
|
- "''",
|
|
|
- ],
|
|
|
- project_key,
|
|
|
- ]
|
|
|
- )
|
|
|
-
|
|
|
- if rollup and columns and not aggregations:
|
|
|
- raise InvalidSearchQuery("You cannot use rollup without an aggregate field.")
|
|
|
-
|
|
|
- orderby = snuba_filter.orderby
|
|
|
- # Only sort if there are columns. When there are only aggregates there's no need to sort
|
|
|
- if orderby and len(columns) > 0:
|
|
|
- orderby = resolve_orderby(orderby, columns, aggregations)
|
|
|
- else:
|
|
|
- orderby = None
|
|
|
-
|
|
|
- # If aggregations are present all columns
|
|
|
- # need to be added to the group by so that the query is valid.
|
|
|
- if aggregations:
|
|
|
- for column in columns:
|
|
|
- if isinstance(column, (list, tuple)):
|
|
|
- if column[0] == "transform":
|
|
|
- # When there's a project transform, we already group by project_id
|
|
|
- continue
|
|
|
- if column[2] == USER_DISPLAY_ALIAS:
|
|
|
- # user.display needs to be grouped by its coalesce function
|
|
|
- groupby.append(column)
|
|
|
- continue
|
|
|
- groupby.append(column[2])
|
|
|
- else:
|
|
|
- if column in aggregate_fields:
|
|
|
- conflicting_functions = list(aggregate_fields[column])
|
|
|
- raise InvalidSearchQuery(
|
|
|
- "A single field cannot be used both inside and outside a function in the same query. To use {field} you must first remove the function(s): {function_msg}".format(
|
|
|
- field=column,
|
|
|
- function_msg=", ".join(conflicting_functions[:2])
|
|
|
- + (
|
|
|
- f" and {len(conflicting_functions) - 2} more."
|
|
|
- if len(conflicting_functions) > 2
|
|
|
- else ""
|
|
|
- ),
|
|
|
- )
|
|
|
- )
|
|
|
- groupby.append(column)
|
|
|
-
|
|
|
- return {
|
|
|
- "selected_columns": columns,
|
|
|
- "aggregations": aggregations,
|
|
|
- "groupby": groupby,
|
|
|
- "orderby": orderby,
|
|
|
- "functions": functions,
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-TAG_KEY_RE = re.compile(r"^tags\[(?P<tag>.*)\]$")
|