query.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. """Dynamic query parsing library."""
  2. from typing import Any, List, Optional, Tuple, Union
  3. from rest_framework.exceptions import ParseError
  4. from snuba_sdk import Column, Condition, Function, Identifier, Lambda, Op
  5. from snuba_sdk.conditions import And, Or
  6. from snuba_sdk.expressions import Expression
  7. from snuba_sdk.orderby import Direction, OrderBy
  8. from sentry.api.event_search import ParenExpression, SearchFilter
  9. # Interface.
  10. OPERATOR_MAP = {
  11. "=": Op.EQ,
  12. "!=": Op.NEQ,
  13. ">": Op.GT,
  14. ">=": Op.GTE,
  15. "<": Op.LT,
  16. "<=": Op.LTE,
  17. "IN": Op.IN,
  18. "NOT IN": Op.NOT_IN,
  19. }
  20. class Field:
  21. def __init__(
  22. self,
  23. name: Optional[str] = None,
  24. field_alias: Optional[str] = None,
  25. query_alias: Optional[str] = None,
  26. is_filterable: bool = True,
  27. is_sortable: bool = True,
  28. operators: Optional[list] = None,
  29. validators: Optional[list] = None,
  30. ) -> None:
  31. self.attribute_name = None
  32. self.field_alias = field_alias or name
  33. self.query_alias = query_alias or name
  34. self.is_filterable = is_filterable
  35. self.is_sortable = is_sortable
  36. self.operators = operators or self._operators
  37. self.validators = validators or []
  38. def deserialize_operator(self, operator: str) -> Tuple[Any, List[str]]:
  39. op = OPERATOR_MAP.get(operator)
  40. if op is None:
  41. return None, ["Operator not found."]
  42. elif op not in self.operators:
  43. return None, ["Operator not permitted."]
  44. else:
  45. return op, []
  46. def deserialize_values(self, values: List[str]) -> Tuple[List[Any], List[str]]:
  47. parsed_values = []
  48. for value in values:
  49. parsed_value, errors = self.deserialize_value(value)
  50. if errors:
  51. return None, errors
  52. parsed_values.append(parsed_value)
  53. return parsed_values, []
  54. def deserialize_value(self, value: Union[List[str], str]) -> Tuple[Any, List[str]]:
  55. if isinstance(value, list):
  56. return self.deserialize_values(value)
  57. try:
  58. typed_value = self._python_type(value)
  59. except ValueError:
  60. return None, ["Invalid value specified."]
  61. for validator in self.validators:
  62. error = validator(typed_value)
  63. if error:
  64. return None, [error]
  65. return typed_value, []
  66. def as_condition(
  67. self,
  68. field_alias: str,
  69. operator: Op,
  70. value: Union[List[str], str],
  71. ) -> Condition:
  72. return Condition(Column(self.query_alias or self.attribute_name), operator, value)
  73. class String(Field):
  74. _operators = [Op.EQ, Op.NEQ, Op.IN, Op.NOT_IN]
  75. _python_type = str
  76. class Number(Field):
  77. _operators = [Op.EQ, Op.NEQ, Op.GT, Op.GTE, Op.LT, Op.LTE, Op.IN, Op.NOT_IN]
  78. _python_type = int
  79. class Tag(Field):
  80. _operators = [Op.EQ, Op.NEQ, Op.IN, Op.NOT_IN]
  81. _negation_map = [False, True, False, True]
  82. _python_type = str
  83. def __init__(self, **kwargs):
  84. kwargs.pop("operators", None)
  85. return super().__init__(**kwargs)
  86. def deserialize_operator(self, operator: str) -> Tuple[Op, List[str]]:
  87. op = OPERATOR_MAP.get(operator)
  88. if op is None:
  89. return None, ["Operator not found."]
  90. elif op not in self._operators:
  91. return None, ["Operator not permitted."]
  92. else:
  93. return op, []
  94. def as_condition(
  95. self,
  96. field_alias: str,
  97. operator: Op,
  98. value: Union[List[str], str],
  99. ) -> Condition:
  100. negated = operator not in (Op.EQ, Op.IN)
  101. return filter_tag_by_value(
  102. key=field_alias,
  103. values=value,
  104. negated=negated,
  105. )
  106. class QueryConfig:
  107. def __init__(self, only: Optional[Tuple[str]] = None) -> None:
  108. self.fields = {}
  109. for field_name in only or self.__class__.__dict__:
  110. field = getattr(self, field_name)
  111. if isinstance(field, Field):
  112. field.attribute_name = field_name
  113. self.insert(field_name, field)
  114. self.insert(field.field_alias, field)
  115. def get(self, field_name: str, default=None) -> Field:
  116. return self.fields.get(field_name, default)
  117. def insert(self, field_name: Optional[str], value: Field) -> None:
  118. if field_name is None:
  119. return None
  120. elif field_name in self.fields:
  121. raise KeyError(f"Field already exists: {field_name}")
  122. else:
  123. self.fields[field_name] = value
  124. # Implementation.
  125. def generate_valid_conditions(
  126. query: List[Union[SearchFilter, ParenExpression, str]], query_config: QueryConfig
  127. ) -> List[Expression]:
  128. """Convert search filters to snuba conditions."""
  129. result: List[Expression] = []
  130. look_back = None
  131. for search_filter in query:
  132. # SearchFilters are appended to the result set. If they are top level filters they are
  133. # implicitly And'ed in the WHERE/HAVING clause.
  134. if isinstance(search_filter, SearchFilter):
  135. condition = filter_to_condition(search_filter, query_config)
  136. if look_back == "AND":
  137. look_back = None
  138. attempt_compressed_condition(result, condition, And)
  139. elif look_back == "OR":
  140. look_back = None
  141. attempt_compressed_condition(result, condition, Or)
  142. else:
  143. result.append(condition)
  144. # ParenExpressions are recursively computed. If more than one condition is returned then
  145. # those conditions are And'ed.
  146. elif isinstance(search_filter, ParenExpression):
  147. conditions = generate_valid_conditions(search_filter.children, query_config)
  148. if len(conditions) < 2:
  149. result.extend(conditions)
  150. else:
  151. result.append(And(conditions))
  152. # String types are limited to AND and OR... I think? In the case where its not a valid
  153. # look-back it is implicitly ignored.
  154. elif isinstance(search_filter, str):
  155. look_back = search_filter
  156. return result
  157. def filter_to_condition(search_filter: SearchFilter, query_config: QueryConfig) -> Condition:
  158. """Coerce SearchFilter syntax to snuba Condition syntax."""
  159. # Validate field exists and is filterable.
  160. field_alias = search_filter.key.name
  161. field = query_config.get(field_alias) or query_config.get("*")
  162. if field is None:
  163. raise ParseError(f"Invalid field specified: {field_alias}.")
  164. if not field.is_filterable:
  165. raise ParseError(f'"{field_alias}" is not filterable.')
  166. # Validate strategy is correct.
  167. query_operator = search_filter.operator
  168. operator, errors = field.deserialize_operator(query_operator)
  169. if errors:
  170. raise ParseError(f"Invalid operator specified: {field_alias}.")
  171. # Deserialize value to its correct type or error.
  172. query_value = search_filter.value.value
  173. value, errors = field.deserialize_value(query_value)
  174. if errors:
  175. raise ParseError(f"Invalid value specified: {field_alias}.")
  176. return field.as_condition(field_alias, operator, value)
  177. def attempt_compressed_condition(
  178. result: List[Expression],
  179. condition: Condition,
  180. condition_type: Union[And, Or],
  181. ):
  182. """Unnecessary query optimization.
  183. Improves legibility for query debugging. Clickhouse would flatten these nested OR statements
  184. internally anyway.
  185. (block OR block) OR block => (block OR block OR block)
  186. """
  187. if isinstance(result[-1], condition_type):
  188. result[-1].conditions.append(condition)
  189. else:
  190. result.append(condition_type([result.pop(), condition]))
  191. def get_valid_sort_commands(
  192. sort: Optional[str],
  193. default: OrderBy,
  194. query_config: QueryConfig,
  195. ) -> List[OrderBy]:
  196. if not sort:
  197. return [default]
  198. if sort.startswith("-"):
  199. strategy = Direction.DESC
  200. field_name = sort[1:]
  201. else:
  202. strategy = Direction.ASC
  203. field_name = sort
  204. field = query_config.get(field_name)
  205. if not field:
  206. raise ParseError(f"Invalid field specified: {field_name}.")
  207. else:
  208. return [OrderBy(Column(field.query_alias or field.attribute_name), strategy)]
  209. # Tag filtering behavior.
  210. def filter_tag_by_value(
  211. key: str,
  212. values: Union[List[str], str],
  213. negated: bool = False,
  214. ) -> Condition:
  215. """Helper function that allows filtering a tag by multiple values."""
  216. function = "hasAny" if isinstance(values, list) else "has"
  217. expected = 0 if negated else 1
  218. return Condition(
  219. Function(function, parameters=[_all_values_for_tag_key(key), values]),
  220. Op.EQ,
  221. expected,
  222. )
  223. def _all_values_for_tag_key(key: str) -> Function:
  224. return Function(
  225. "arrayFilter",
  226. parameters=[
  227. Lambda(
  228. ["key", "mask"],
  229. Function("equals", parameters=[Identifier("mask"), 1]),
  230. ),
  231. Column("tv"),
  232. _bitmask_on_tag_key(key),
  233. ],
  234. )
  235. def _bitmask_on_tag_key(key: str) -> Function:
  236. """Create a bit mask.
  237. Returns an array where the integer 1 represents a match.
  238. e.g.: [0, 0, 1, 0, 1, 0]
  239. """
  240. return Function(
  241. "arrayMap",
  242. parameters=[
  243. Lambda(
  244. ["index", "key"],
  245. Function("equals", parameters=[Identifier("key"), key]),
  246. ),
  247. Function("arrayEnumerate", parameters=[Column("tk")]),
  248. Column("tk"),
  249. ],
  250. )