extraction.py 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550
  1. from __future__ import annotations
  2. import hashlib
  3. import logging
  4. from dataclasses import dataclass
  5. from enum import Enum
  6. from typing import (
  7. Any,
  8. Callable,
  9. Dict,
  10. List,
  11. Literal,
  12. Optional,
  13. Sequence,
  14. Tuple,
  15. Type,
  16. TypedDict,
  17. TypeVar,
  18. Union,
  19. cast,
  20. )
  21. import sentry_sdk
  22. from django.utils.functional import cached_property
  23. from typing_extensions import NotRequired
  24. from sentry.api import event_search
  25. from sentry.api.event_search import (
  26. AggregateFilter,
  27. ParenExpression,
  28. QueryOp,
  29. QueryToken,
  30. SearchFilter,
  31. SearchKey,
  32. SearchValue,
  33. )
  34. from sentry.constants import APDEX_THRESHOLD_DEFAULT, DataCategory
  35. from sentry.discover.arithmetic import is_equation
  36. from sentry.exceptions import InvalidSearchQuery
  37. from sentry.models.project import Project
  38. from sentry.models.transaction_threshold import ProjectTransactionThreshold, TransactionMetric
  39. from sentry.search.events import fields
  40. from sentry.search.events.builder import UnresolvedQuery
  41. from sentry.search.events.constants import VITAL_THRESHOLDS
  42. from sentry.snuba.dataset import Dataset
  43. from sentry.snuba.metrics.naming_layer.mri import ParsedMRI, parse_mri
  44. from sentry.snuba.metrics.utils import MetricOperationType
  45. from sentry.utils.snuba import is_measurement, is_span_op_breakdown, resolve_column
  46. logger = logging.getLogger(__name__)
  47. # Name component of MRIs used for custom alert metrics.
  48. CUSTOM_ALERT_METRIC_NAME = "transactions/on_demand"
  49. QUERY_HASH_KEY = "query_hash"
  50. # Base type for conditions to evaluate on payloads.
  51. # TODO: Streamline with dynamic sampling.
  52. RuleCondition = Union["LogicalRuleCondition", "ComparingRuleCondition", "NotRuleCondition"]
  53. # Maps from Discover's field names to event protocol paths. See Relay's
  54. # ``Getter`` implementation for ``Event`` for supported fields. All fields need to be prefixed
  55. # with "event.".
  56. # List of UI supported search fields is defined in sentry/static/app/utils/fields/index.ts
  57. _SEARCH_TO_PROTOCOL_FIELDS = {
  58. # Top-level fields
  59. "release": "release",
  60. "dist": "dist",
  61. "environment": "environment",
  62. "transaction": "transaction",
  63. "platform": "platform",
  64. "platform.name": "platform",
  65. "level": "level",
  66. "logger": "logger",
  67. # Top-level structures ("interfaces")
  68. "user.email": "user.email",
  69. "user.id": "user.id",
  70. "user.ip": "user.ip_address",
  71. "user.username": "user.name",
  72. "user.segment": "user.segment",
  73. "geo.city": "user.geo.city",
  74. "geo.country_code": "user.geo.country_code",
  75. "geo.region": "user.geo.region",
  76. "geo.subdivision": "user.geo.subdivision",
  77. "http.method": "request.method",
  78. "http.url": "request.url",
  79. "http.referer": "request.headers.Referer",
  80. "transaction.source": "transaction.source",
  81. # url is a tag extracted by Sentry itself, on Relay it's received as `request.url`
  82. "url": "request.url",
  83. "sdk.name": "sdk.name",
  84. "sdk.version": "sdk.version",
  85. # Subset of context fields
  86. "app.in_foreground": "contexts.app.in_foreground",
  87. "device": "contexts.device.model",
  88. "device.arch": "contexts.device.arch",
  89. "device.battery_level": "contexts.device.battery_level",
  90. "device.brand": "contexts.device.brand",
  91. "device.charging": "contexts.device.charging",
  92. "device.family": "contexts.device.family",
  93. "device.locale": "contexts.device.locale",
  94. "device.online": "contexts.device.online",
  95. "device.orientation": "contexts.device.orientation",
  96. "device.name": "contexts.device.name",
  97. "device.screen_density": "contexts.device.screen_density",
  98. "device.screen_dpi": "contexts.device.screen_dpi",
  99. "device.screen_width_pixels": "contexts.device.screen_width_pixels",
  100. "device.screen_height_pixels": "contexts.device.screen_height_pixels",
  101. "device.simulator": "contexts.device.simulator",
  102. "os.build": "contexts.os.build",
  103. "os.kernel_version": "contexts.os.kernel_version",
  104. "os.name": "contexts.os.name",
  105. "os.version": "contexts.os.version",
  106. "browser.name": "contexts.browser.name",
  107. "device.uuid": "contexts.device.uuid",
  108. "transaction.status": "contexts.trace.status",
  109. "transaction.op": "contexts.trace.op",
  110. "http.status_code": "contexts.response.status_code",
  111. "unreal.crash_type": "contexts.unreal.crash_type",
  112. "profile.id": "contexts.profile.profile_id",
  113. # Computed fields
  114. "transaction.duration": "duration",
  115. "release.build": "release.build",
  116. "release.package": "release.package",
  117. "release.version": "release.version.short",
  118. # Known tags that have to be mapped to fields that Relay can extract
  119. "tags[level]": "level",
  120. "tags[logger]": "logger",
  121. "tags[environment]": "environment",
  122. "tags[transaction]": "transaction",
  123. "tags[release]": "release",
  124. "tags[dist]": "dist",
  125. # These match the mapping in sentry/interfaces/contexts.py
  126. "tags[app.device]": "device_app_hash",
  127. "tags[device]": "device.model",
  128. "tags[device.family]": "device.family",
  129. "tags[runtime]": "runtime",
  130. "tags[runtime.name]": "runtime.name",
  131. "tags[browser]": "browser",
  132. "tags[browser.name]": "browser.name",
  133. "tags[os]": "os",
  134. "tags[os.name]": "os.name",
  135. "tags[os.rooted]": "os.rooted",
  136. "tags[gpu.vendor]": "gpu.vendor_name",
  137. "tags[gpu.name]": "gpu.name",
  138. "tags[monitor.id]": "monitor.id",
  139. "tags[monitor.slug]": "monitor.slug",
  140. # Tags, measurements, and breakdowns are mapped by the converter
  141. }
  142. # Maps from Discover's syntax to Relay rule condition operators.
  143. _SEARCH_TO_RELAY_OPERATORS: Dict[str, CompareOp] = {
  144. "=": "eq",
  145. "!=": "eq", # combined with external negation
  146. "<": "lt",
  147. "<=": "lte",
  148. ">": "gt",
  149. ">=": "gte",
  150. "IN": "eq",
  151. "NOT IN": "eq", # combined with external negation
  152. }
  153. # Maps from parsed count_if condition args to Relay rule condition operators.
  154. _COUNTIF_TO_RELAY_OPERATORS: Dict[str, CompareOp] = {
  155. "equals": "eq",
  156. "notEquals": "eq",
  157. "less": "lt",
  158. "greater": "gt",
  159. "lessOrEquals": "lte",
  160. "greaterOrEquals": "gte",
  161. }
  162. # Maps plain Discover functions to metric aggregation functions.
  163. _SEARCH_TO_METRIC_AGGREGATES: Dict[str, MetricOperationType] = {
  164. "count": "sum",
  165. "count_if": "sum",
  166. "avg": "avg",
  167. "min": "min",
  168. "max": "max",
  169. "p50": "p50",
  170. "p75": "p75",
  171. "p90": "p90",
  172. "p95": "p95",
  173. "p99": "p99",
  174. # p100 is not supported in the metrics layer, so we convert to max which is equivalent.
  175. "p100": "max"
  176. # generic percentile is not supported by metrics layer.
  177. }
  178. # Maps plain Discover functions to derived metric functions which are understood by the metrics layer.
  179. _SEARCH_TO_DERIVED_METRIC_AGGREGATES: Dict[str, MetricOperationType] = {
  180. "failure_count": "on_demand_failure_count",
  181. "failure_rate": "on_demand_failure_rate",
  182. "apdex": "on_demand_apdex",
  183. "count_web_vitals": "on_demand_count_web_vitals",
  184. "epm": "on_demand_epm",
  185. "eps": "on_demand_eps",
  186. "user_misery": "on_demand_user_misery",
  187. }
  188. # Mapping to infer metric type from Discover function.
  189. _AGGREGATE_TO_METRIC_TYPE = {
  190. "count": "c",
  191. "count_if": "c",
  192. "avg": "d",
  193. "max": "d",
  194. "p50": "d",
  195. "p75": "d",
  196. "p90": "d",
  197. "p95": "d",
  198. "p99": "d",
  199. "p100": "d",
  200. "percentile": "d",
  201. # With on demand metrics, evaluated metrics are actually stored, thus we have to choose a concrete metric type.
  202. "failure_count": "c",
  203. "failure_rate": "c",
  204. "count_web_vitals": "c",
  205. "apdex": "c",
  206. "epm": "c",
  207. "eps": "c",
  208. "user_misery": "s",
  209. }
  210. _NO_ARG_METRICS = [
  211. "on_demand_epm",
  212. "on_demand_eps",
  213. "on_demand_failure_count",
  214. "on_demand_failure_rate",
  215. ]
  216. _MULTIPLE_ARGS_METRICS = ["on_demand_apdex", "on_demand_count_web_vitals", "on_demand_user_misery"]
  217. # Query fields that on their own do not require on-demand metric extraction but if present in an on-demand query
  218. # will be converted to metric extraction conditions.
  219. _STANDARD_METRIC_FIELDS = [
  220. "release",
  221. "dist",
  222. "environment",
  223. "transaction",
  224. "platform",
  225. "transaction.status",
  226. "transaction.op",
  227. "http.method",
  228. "http.status_code",
  229. "browser.name",
  230. "os.name",
  231. "geo.country_code",
  232. ]
  233. # Query fields that we do not consider for the extraction since they are not needed.
  234. _BLACKLISTED_METRIC_FIELDS = ["event.type", "project"]
  235. # Operators used in ``ComparingRuleCondition``.
  236. CompareOp = Literal["eq", "gt", "gte", "lt", "lte", "glob"]
  237. Variables = Dict[str, Any]
  238. query_builder = UnresolvedQuery(
  239. dataset=Dataset.Transactions, params={}
  240. ) # Workaround to get all updated discover functions instead of using the deprecated events fields.
  241. class ComparingRuleCondition(TypedDict):
  242. """RuleCondition that compares a named field to a reference value."""
  243. op: CompareOp
  244. name: str
  245. value: Any
  246. class LogicalRuleCondition(TypedDict):
  247. """RuleCondition that applies a logical operator to a sequence of conditions."""
  248. op: Literal["and", "or"]
  249. inner: List[RuleCondition]
  250. class NotRuleCondition(TypedDict):
  251. """RuleCondition that negates an inner condition."""
  252. op: Literal["not"]
  253. inner: RuleCondition
  254. class TagSpec(TypedDict):
  255. """
  256. Configuration for a tag to add to a metric.
  257. Tags values can be static if defined through `value` or dynamically queried
  258. from the payload if defined through `field`. These two options are mutually
  259. exclusive, behavior is undefined if both are specified.
  260. """
  261. key: str
  262. field: NotRequired[str]
  263. value: NotRequired[str]
  264. condition: NotRequired[RuleCondition]
  265. class MetricSpec(TypedDict):
  266. """
  267. Specification for a metric to extract from some data.
  268. The metric type is given as part of the MRI (metric reference identifier)
  269. which must follow the form: `<type>:<namespace>/<name>@<unit>`.
  270. How the metric's value is obtained depends on the metric type:
  271. - Counter metrics are a special case, since the default product counters do
  272. not count any specific field but rather the occurrence of the event. As
  273. such, there is no value expression, and the field is set to `None`.
  274. Semantics of specifying remain undefined at this point.
  275. - Distribution metrics require a numeric value.
  276. - Set metrics require a string value, which is then emitted into the set as
  277. unique value. Insertion of numbers and other types is undefined.
  278. """
  279. category: Literal["transaction"]
  280. mri: str
  281. field: NotRequired[Optional[str]]
  282. condition: NotRequired[RuleCondition]
  283. tags: NotRequired[Sequence[TagSpec]]
  284. def _check_event_type_transaction(
  285. query: Sequence[QueryToken], is_top_level_call: bool = True
  286. ) -> bool:
  287. transaction_filter = False
  288. for token in query:
  289. if isinstance(token, SearchFilter):
  290. if token.key.name == "event.type" and token.value.value == "transaction":
  291. transaction_filter = True
  292. break
  293. elif isinstance(token, ParenExpression):
  294. contains_transaction = _check_event_type_transaction(
  295. token.children, is_top_level_call=False
  296. )
  297. if contains_transaction:
  298. transaction_filter = True
  299. break
  300. # Only if we are top level call, and we didn't find any transaction filter, we throw an exception, otherwise it
  301. # means we are in a nested expression and not finding a transaction doesn't mean we never found it.
  302. if is_top_level_call and not transaction_filter:
  303. raise ValueError("event.type:transaction not found in the query")
  304. return transaction_filter
  305. def _transform_search_filter(search_filter: SearchFilter) -> SearchFilter:
  306. # If we have `message:something` we convert it to `message:*something*` since we want to perform `contains` matching
  307. # exactly how discover does it.
  308. if search_filter.key.name == "message":
  309. return SearchFilter(
  310. key=SearchKey(name=search_filter.key.name),
  311. operator=search_filter.operator,
  312. value=SearchValue(raw_value=f"*{search_filter.value.raw_value}*"),
  313. )
  314. # If we have `transaction.status:unknown_error` we convert it to `transaction.status:unknown` since we need to be
  315. # backward compatible.
  316. if (
  317. search_filter.key.name == "transaction.status"
  318. and search_filter.value.raw_value == "unknown_error"
  319. ):
  320. return SearchFilter(
  321. key=SearchKey(name=search_filter.key.name),
  322. operator=search_filter.operator,
  323. value=SearchValue(raw_value="unknown"),
  324. )
  325. return search_filter
  326. def _transform_search_query(query: Sequence[QueryToken]) -> Sequence[QueryToken]:
  327. transformed_query: List[QueryToken] = []
  328. for token in query:
  329. if isinstance(token, SearchFilter):
  330. transformed_query.append(_transform_search_filter(token))
  331. elif isinstance(token, ParenExpression):
  332. transformed_query.append(ParenExpression(_transform_search_query(token.children)))
  333. else:
  334. transformed_query.append(token)
  335. return transformed_query
  336. def parse_search_query(
  337. query: Optional[str],
  338. removed_blacklisted: bool = False,
  339. force_transaction_event_type: bool = False,
  340. ) -> Sequence[QueryToken]:
  341. """
  342. Parses a search query with the discover grammar and performs some transformations on the AST in order to account for
  343. edge cases.
  344. """
  345. tokens = cast(Sequence[QueryToken], event_search.parse_search_query(query))
  346. # We might want to force the `event.type:transaction` to be in the query, as a validation step.
  347. if force_transaction_event_type:
  348. _check_event_type_transaction(tokens)
  349. # As first step, we transform the search query by applying basic transformations.
  350. tokens = _transform_search_query(tokens)
  351. # As second step, if enabled, we remove elements from the query which are blacklisted.
  352. if removed_blacklisted:
  353. tokens = cleanup_search_query(_remove_blacklisted_search_filters(tokens))
  354. return tokens
  355. def cleanup_search_query(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
  356. """
  357. Recreates a valid query from an original query that has had on demand search filters removed.
  358. When removing filters from a query it is possible to create invalid queries.
  359. For example removing the on demand filters from "transaction.duration:>=1s OR browser.version:1 AND environment:dev"
  360. would result in "OR AND environment:dev" which is not a valid query this should be cleaned to "environment:dev.
  361. "release:internal and browser.version:1 or os.name:android" => "release:internal or and os.name:android" which
  362. would be cleaned to "release:internal or os.name:android"
  363. """
  364. tokens = list(tokens)
  365. # remove empty parens
  366. removed_empty_parens: List[QueryToken] = []
  367. for token in tokens:
  368. if not isinstance(token, ParenExpression):
  369. removed_empty_parens.append(token)
  370. else:
  371. children = cleanup_search_query(token.children)
  372. if len(children) > 0:
  373. removed_empty_parens.append(ParenExpression(children))
  374. # remove AND and OR operators at the start of the query
  375. while len(removed_empty_parens) > 0 and isinstance(removed_empty_parens[0], str):
  376. removed_empty_parens.pop(0)
  377. # remove AND and OR operators at the end of the query
  378. while len(removed_empty_parens) > 0 and isinstance(removed_empty_parens[-1], str):
  379. removed_empty_parens.pop()
  380. # remove AND and OR operators that are next to each other
  381. ret_val = []
  382. previous_token: Optional[QueryToken] = None
  383. for token in removed_empty_parens:
  384. # this loop takes care of removing consecutive AND/OR operators (keeping only one of them)
  385. if isinstance(token, str) and isinstance(previous_token, str):
  386. token = cast(QueryOp, token.upper())
  387. # this handles two AND/OR operators next to each other, we must drop one of them
  388. # if we have an AND do nothing (AND will be merged in the previous token see comment below)
  389. # if we have an OR the resulting operator will be an OR
  390. # AND OR => OR
  391. # OR OR => OR
  392. # OR AND => OR
  393. # AND AND => AND
  394. if token == "OR":
  395. previous_token = "OR"
  396. continue
  397. elif previous_token is not None:
  398. ret_val.append(previous_token)
  399. previous_token = token
  400. # take care of the last token (if any)
  401. if previous_token is not None:
  402. ret_val.append(previous_token)
  403. return ret_val
  404. def _parse_function(aggregate: str) -> Tuple[str, List[str], str]:
  405. """
  406. Parses an aggregate and returns its components.
  407. This function is a slightly modified version of the `parse_function` method of the query builders.
  408. """
  409. match = fields.is_function(aggregate)
  410. if not match:
  411. raise InvalidSearchQuery(f"Invalid characters in field {aggregate}")
  412. function = match.group("function")
  413. arguments = fields.parse_arguments(function, match.group("columns"))
  414. alias = match.group("alias")
  415. if alias is None:
  416. alias = fields.get_function_alias_with_columns(function, arguments)
  417. return function, arguments, alias
  418. @dataclass(frozen=True)
  419. class SupportedBy:
  420. """Result of a check for standard and on-demand metric support."""
  421. standard_metrics: bool
  422. on_demand_metrics: bool
  423. @classmethod
  424. def neither(cls):
  425. return cls(standard_metrics=False, on_demand_metrics=False)
  426. @classmethod
  427. def both(cls):
  428. return cls(standard_metrics=True, on_demand_metrics=True)
  429. @classmethod
  430. def combine(cls, *supported_by):
  431. return cls(
  432. standard_metrics=all(s.standard_metrics for s in supported_by),
  433. on_demand_metrics=all(s.on_demand_metrics for s in supported_by),
  434. )
  435. def should_use_on_demand_metrics(
  436. dataset: Optional[Union[str, Dataset]],
  437. aggregate: str,
  438. query: Optional[str],
  439. groupbys: Optional[Sequence[str]] = None,
  440. prefilling: bool = False,
  441. ) -> bool:
  442. """On-demand metrics are used if the aggregate and query are supported by on-demand metrics but not standard"""
  443. groupbys = groupbys or []
  444. supported_datasets = [Dataset.PerformanceMetrics]
  445. # In case we are running a prefill, we want to support also transactions, since our goal is to start extracting
  446. # metrics that will be needed after a query is converted from using transactions to metrics.
  447. if prefilling:
  448. supported_datasets.append(Dataset.Transactions)
  449. if not dataset or Dataset(dataset) not in supported_datasets:
  450. return False
  451. components = _extract_aggregate_components(aggregate)
  452. if components is None:
  453. return False
  454. function, args = components
  455. mri_aggregate = _extract_mri(args)
  456. if mri_aggregate is not None:
  457. # For now, we do not support MRIs in on demand metrics.
  458. return False
  459. aggregate_supported_by = _get_aggregate_supported_by(function, args)
  460. query_supported_by = _get_query_supported_by(query)
  461. groupbys_supported_by = _get_groupbys_support(groupbys)
  462. supported_by = SupportedBy.combine(
  463. aggregate_supported_by, query_supported_by, groupbys_supported_by
  464. )
  465. return not supported_by.standard_metrics and supported_by.on_demand_metrics
  466. def _extract_aggregate_components(aggregate: str) -> Optional[Tuple[str, List[str]]]:
  467. try:
  468. if is_equation(aggregate):
  469. return None
  470. function, args, _ = _parse_function(aggregate)
  471. return function, args
  472. except InvalidSearchQuery:
  473. logger.exception("Failed to parse aggregate: %s", aggregate)
  474. return None
  475. def _extract_mri(args: List[str]) -> Optional[ParsedMRI]:
  476. if len(args) == 0:
  477. return None
  478. return parse_mri(args[0])
  479. def _get_aggregate_supported_by(function: str, args: List[str]) -> SupportedBy:
  480. function_support = _get_function_support(function, args)
  481. args_support = _get_args_support(args, function)
  482. return SupportedBy.combine(function_support, args_support)
  483. def _get_function_support(function: str, args: Sequence[str]) -> SupportedBy:
  484. if function == "percentile":
  485. return _get_percentile_support(args)
  486. return SupportedBy(
  487. standard_metrics=True,
  488. on_demand_metrics=(
  489. function in _SEARCH_TO_METRIC_AGGREGATES
  490. or function in _SEARCH_TO_DERIVED_METRIC_AGGREGATES
  491. )
  492. and function in _AGGREGATE_TO_METRIC_TYPE,
  493. )
  494. def _get_percentile_support(args: Sequence[str]) -> SupportedBy:
  495. if len(args) != 2:
  496. return SupportedBy.neither()
  497. if not _get_percentile_op(args):
  498. return SupportedBy.neither()
  499. return SupportedBy.both()
  500. def _get_percentile_op(args: Sequence[str]) -> Optional[MetricOperationType]:
  501. if len(args) != 2:
  502. raise ValueError("Percentile function should have 2 arguments")
  503. percentile = args[1]
  504. if percentile in ["0.5", "0.50"]:
  505. return "p50"
  506. if percentile == "0.75":
  507. return "p75"
  508. if percentile in ["0.9", "0.90"]:
  509. return "p90"
  510. if percentile == "0.95":
  511. return "p95"
  512. if percentile == "0.99":
  513. return "p99"
  514. if percentile in ["1", "1.0"]:
  515. return "p100"
  516. return None
  517. def _get_field_support(field: str) -> SupportedBy:
  518. standard_metrics = _is_standard_metrics_field(field)
  519. on_demand_metrics = _is_on_demand_supported_field(field)
  520. return SupportedBy(standard_metrics=standard_metrics, on_demand_metrics=on_demand_metrics)
  521. def _get_args_support(fields: Sequence[str], used_in_function: Optional[str] = None) -> SupportedBy:
  522. if len(fields) == 0:
  523. return SupportedBy.both()
  524. if used_in_function == "apdex":
  525. # apdex can have two variations, either apdex() or apdex(value).
  526. return SupportedBy(on_demand_metrics=True, standard_metrics=False)
  527. arg = fields[0]
  528. return _get_field_support(arg)
  529. def _get_groupbys_support(groupbys: Sequence[str]) -> SupportedBy:
  530. if len(groupbys) == 0:
  531. return SupportedBy.both()
  532. return SupportedBy.combine(*[_get_field_support(groupby) for groupby in groupbys])
  533. def _get_query_supported_by(query: Optional[str]) -> SupportedBy:
  534. try:
  535. parsed_query = parse_search_query(query=query, removed_blacklisted=False)
  536. standard_metrics = _is_standard_metrics_query(parsed_query)
  537. on_demand_metrics = _is_on_demand_supported_query(parsed_query)
  538. return SupportedBy(standard_metrics=standard_metrics, on_demand_metrics=on_demand_metrics)
  539. except InvalidSearchQuery:
  540. logger.exception("Failed to parse search query: %s", query)
  541. return SupportedBy.neither()
  542. def _is_standard_metrics_query(tokens: Sequence[QueryToken]) -> bool:
  543. """
  544. Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics.
  545. """
  546. for token in tokens:
  547. if not _is_standard_metrics_search_filter(token):
  548. return False
  549. return True
  550. def _is_standard_metrics_search_filter(token: QueryToken) -> bool:
  551. if isinstance(token, SearchFilter):
  552. return _is_standard_metrics_search_term(token.key.name)
  553. if isinstance(token, ParenExpression):
  554. return _is_standard_metrics_query(token.children)
  555. return True
  556. def _is_on_demand_supported_query(tokens: Sequence[QueryToken]) -> bool:
  557. """
  558. Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics.
  559. """
  560. for token in tokens:
  561. if not _is_on_demand_supported_search_filter(token):
  562. return False
  563. return True
  564. def _is_on_demand_supported_search_filter(token: QueryToken) -> bool:
  565. if isinstance(token, AggregateFilter):
  566. return False
  567. if isinstance(token, SearchFilter):
  568. if not _SEARCH_TO_RELAY_OPERATORS.get(token.operator):
  569. return False
  570. return (
  571. not _is_excluding_transactions(token)
  572. and not _is_error_field(token.key.name)
  573. and _is_on_demand_supported_field(token.key.name)
  574. )
  575. if isinstance(token, ParenExpression):
  576. return _is_on_demand_supported_query(token.children)
  577. return True
  578. def _is_excluding_transactions(token: SearchFilter) -> bool:
  579. if token.key.name != "event.type":
  580. return False
  581. is_not_transaction = token.operator == "!=" and token.value.raw_value == "transaction"
  582. is_error_or_default = token.operator == "=" and token.value.raw_value in ["error", "default"]
  583. return is_not_transaction or is_error_or_default
  584. def _is_standard_metrics_field(field: str) -> bool:
  585. return (
  586. _is_standard_metrics_search_term(field)
  587. or is_measurement(field)
  588. or is_span_op_breakdown(field)
  589. or field == "transaction.duration"
  590. )
  591. def _is_error_field(token: str) -> bool:
  592. return token.startswith("error.")
  593. def _is_standard_metrics_search_term(field: str) -> bool:
  594. return field in _STANDARD_METRIC_FIELDS
  595. def _is_on_demand_supported_field(field: str) -> bool:
  596. # If it's a black listed field, we consider it as compatible with on demand.
  597. if field in _BLACKLISTED_METRIC_FIELDS:
  598. return True
  599. try:
  600. _map_field_name(field)
  601. return True
  602. except ValueError:
  603. return False
  604. def to_standard_metrics_query(query: str) -> str:
  605. """
  606. Converts a query containing on demand search fields to a query that can be
  607. run using only standard metrics.
  608. This is done by removing conditions requiring on-demand metrics.
  609. NOTE: This does **NOT** create an equivalent query. It only creates the best
  610. approximation available using only standard metrics. It is used for approximating
  611. the volume of an on-demand metrics query using a combination of indexed and metrics data.
  612. Examples:
  613. "environment:dev AND transaction.duration:>=1s" -> "environment:dev"
  614. "environment:dev OR transaction.duration:>=1s" -> "environment:dev"
  615. "transaction.duration:>=1s OR browser.version:1" -> ""
  616. "transaction.duration:>=1s AND browser.version:1" -> ""
  617. """
  618. try:
  619. tokens = parse_search_query(query=query, removed_blacklisted=False)
  620. except InvalidSearchQuery:
  621. logger.exception("Failed to parse search query: %s", query)
  622. raise
  623. cleaned_query = to_standard_metrics_tokens(tokens)
  624. return query_tokens_to_string(cleaned_query)
  625. def to_standard_metrics_tokens(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
  626. """
  627. Converts a query in token form containing on-demand search fields to a query
  628. that has all on-demand filters removed and can be run using only standard metrics.
  629. """
  630. remaining_tokens = _remove_on_demand_search_filters(tokens)
  631. cleaned_query = cleanup_search_query(remaining_tokens)
  632. return cleaned_query
  633. def query_tokens_to_string(tokens: Sequence[QueryToken]) -> str:
  634. """
  635. Converts a list of tokens into a query string.
  636. """
  637. ret_val = ""
  638. for token in tokens:
  639. if isinstance(token, str):
  640. ret_val += f" {token}"
  641. else:
  642. ret_val += f" {token.to_query_string()}"
  643. return ret_val.strip()
  644. def _remove_on_demand_search_filters(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
  645. """
  646. Removes tokens that contain filters that can only be handled by on demand metrics.
  647. """
  648. ret_val: List[QueryToken] = []
  649. for token in tokens:
  650. if isinstance(token, SearchFilter):
  651. if _is_standard_metrics_search_filter(token):
  652. ret_val.append(token)
  653. elif isinstance(token, ParenExpression):
  654. ret_val.append(ParenExpression(_remove_on_demand_search_filters(token.children)))
  655. else:
  656. ret_val.append(token)
  657. return ret_val
  658. def _remove_blacklisted_search_filters(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
  659. """
  660. Removes tokens that contain filters that are blacklisted.
  661. """
  662. ret_val: List[QueryToken] = []
  663. for token in tokens:
  664. if isinstance(token, SearchFilter):
  665. if token.key.name not in _BLACKLISTED_METRIC_FIELDS:
  666. ret_val.append(token)
  667. elif isinstance(token, ParenExpression):
  668. ret_val.append(ParenExpression(_remove_blacklisted_search_filters(token.children)))
  669. else:
  670. ret_val.append(token)
  671. return ret_val
  672. def _remove_redundant_parentheses(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
  673. """
  674. Removes redundant parentheses in the form (((expr))) since they are not needed and might lead to parsing issues
  675. down the line.
  676. """
  677. if len(tokens) == 1 and isinstance(tokens[0], ParenExpression):
  678. return _remove_redundant_parentheses(tokens[0].children)
  679. return tokens
  680. def _deep_sorted(value: Union[Any, Dict[Any, Any]]) -> Union[Any, Dict[Any, Any]]:
  681. if isinstance(value, dict):
  682. return {key: _deep_sorted(value) for key, value in sorted(value.items())}
  683. else:
  684. return value
  685. TagsSpecsGenerator = Callable[[Project, Optional[Sequence[str]]], List[TagSpec]]
  686. def _get_threshold(arguments: Optional[Sequence[str]]) -> float:
  687. if not arguments:
  688. raise Exception("Threshold parameter required.")
  689. return float(arguments[0])
  690. def failure_tag_spec(_1: Project, _2: Optional[Sequence[str]]) -> List[TagSpec]:
  691. """This specification tags transactions with a boolean saying if it failed."""
  692. return [
  693. {
  694. "key": "failure",
  695. "value": "true",
  696. "condition": {
  697. "inner": {
  698. "name": "event.contexts.trace.status",
  699. "op": "eq",
  700. "value": ["ok", "cancelled", "unknown"],
  701. },
  702. "op": "not",
  703. },
  704. }
  705. ]
  706. def apdex_tag_spec(project: Project, arguments: Optional[Sequence[str]]) -> list[TagSpec]:
  707. apdex_threshold = _get_threshold(arguments)
  708. field = _map_field_name(_get_satisfactory_threshold_and_metric(project)[1])
  709. return [
  710. {
  711. "key": "satisfaction",
  712. "value": "satisfactory",
  713. "condition": {"name": field, "op": "lte", "value": apdex_threshold},
  714. },
  715. {
  716. "key": "satisfaction",
  717. "value": "tolerable",
  718. "condition": {
  719. "inner": [
  720. {"name": field, "op": "gt", "value": apdex_threshold},
  721. {"name": field, "op": "lte", "value": apdex_threshold * 4},
  722. ],
  723. "op": "and",
  724. },
  725. },
  726. {
  727. "key": "satisfaction",
  728. "value": "frustrated",
  729. "condition": {"name": field, "op": "gt", "value": apdex_threshold * 4},
  730. },
  731. ]
  732. def count_web_vitals_spec(project: Project, arguments: Optional[Sequence[str]]) -> list[TagSpec]:
  733. if not arguments:
  734. raise Exception("count_web_vitals requires arguments")
  735. if len(arguments) != 2:
  736. raise Exception("count web vitals requires a vital name and vital rating")
  737. measurement, measurement_rating = arguments
  738. field = _map_field_name(measurement)
  739. _, vital = measurement.split(".")
  740. thresholds = VITAL_THRESHOLDS[vital]
  741. if measurement_rating == "good":
  742. return [
  743. {
  744. "key": "measurement_rating",
  745. "value": "matches_hash",
  746. "condition": {"name": field, "op": "lt", "value": thresholds["meh"]},
  747. }
  748. ]
  749. elif measurement_rating == "meh":
  750. return [
  751. {
  752. "key": "measurement_rating",
  753. "value": "matches_hash",
  754. "condition": {
  755. "inner": [
  756. {"name": field, "op": "gte", "value": thresholds["meh"]},
  757. {"name": field, "op": "lt", "value": thresholds["poor"]},
  758. ],
  759. "op": "and",
  760. },
  761. }
  762. ]
  763. elif measurement_rating == "poor":
  764. return [
  765. {
  766. "key": "measurement_rating",
  767. "value": "matches_hash",
  768. "condition": {"name": field, "op": "gte", "value": thresholds["poor"]},
  769. }
  770. ]
  771. return [
  772. # 'any' measurement_rating
  773. {
  774. "key": "measurement_rating",
  775. "value": "matches_hash",
  776. "condition": {"name": field, "op": "gte", "value": 0},
  777. }
  778. ]
  779. def user_misery_tag_spec(project: Project, arguments: Optional[Sequence[str]]) -> List[TagSpec]:
  780. """A metric that counts the number of unique users who were frustrated; "frustration" is
  781. measured as a response time four times the satisfactory response time threshold (in milliseconds).
  782. It highlights transactions that have the highest impact on users."""
  783. threshold = _get_threshold(arguments)
  784. field = _map_field_name(_get_satisfactory_threshold_and_metric(project)[1])
  785. return [
  786. {
  787. "key": "satisfaction",
  788. "value": "frustrated",
  789. "condition": {"name": field, "op": "gt", "value": threshold * 4},
  790. }
  791. ]
  792. # This is used to map a metric to a function which generates a specification
  793. _DERIVED_METRICS: Dict[MetricOperationType, TagsSpecsGenerator | None] = {
  794. "on_demand_failure_count": failure_tag_spec,
  795. "on_demand_failure_rate": failure_tag_spec,
  796. "on_demand_apdex": apdex_tag_spec,
  797. "on_demand_epm": None,
  798. "on_demand_eps": None,
  799. "on_demand_count_web_vitals": count_web_vitals_spec,
  800. "on_demand_user_misery": user_misery_tag_spec,
  801. }
  802. @dataclass(frozen=True)
  803. class FieldParsingResult:
  804. function: str
  805. arguments: Sequence[str]
  806. alias: str
  807. @dataclass(frozen=True)
  808. class QueryParsingResult:
  809. conditions: Sequence[QueryToken]
  810. def is_empty(self) -> bool:
  811. return len(self.conditions) == 0
  812. class MetricSpecType(Enum):
  813. # Encodes environment into the query hash, does not support group-by environment
  814. SIMPLE_QUERY = "simple_query"
  815. # Omits environment from the query hash, supports group-by on environment for dynamic switching between envs.
  816. DYNAMIC_QUERY = "dynamic_query"
  817. @dataclass
  818. class OnDemandMetricSpec:
  819. """
  820. Contains the information required to query or extract an on-demand metric.
  821. """
  822. # Base fields from outside.
  823. field: str
  824. query: str
  825. groupbys: Sequence[str]
  826. spec_type: MetricSpecType
  827. # Public fields.
  828. op: MetricOperationType
  829. # Private fields.
  830. _metric_type: str
  831. _arguments: Sequence[str]
  832. def __init__(
  833. self,
  834. field: str,
  835. query: str,
  836. environment: Optional[str] = None,
  837. groupbys: Optional[Sequence[str]] = None,
  838. spec_type: MetricSpecType = MetricSpecType.SIMPLE_QUERY,
  839. use_updated_env_logic: bool = True,
  840. ):
  841. self.field = field
  842. self.query = query
  843. self.spec_type = spec_type
  844. self.use_updated_env_logic = use_updated_env_logic
  845. # Removes field if passed in selected_columns
  846. self.groupbys = [groupby for groupby in groupbys or () if groupby != field]
  847. # For now, we just support the environment as extra, but in the future we might need more complex ways to
  848. # combine extra values that are outside the query string.
  849. self.environment = environment
  850. self._arguments = []
  851. self._eager_process()
  852. def _eager_process(self):
  853. op, metric_type, arguments = self._process_field()
  854. self.op = op
  855. self._metric_type = metric_type
  856. self._arguments = arguments or []
  857. sentry_sdk.start_span(
  858. op="OnDemandMetricSpec.spec_type", description=self.spec_type
  859. ).finish()
  860. @property
  861. def field_to_extract(self):
  862. if self.op in ("on_demand_apdex", "on_demand_count_web_vitals"):
  863. return None
  864. if self.op in ("on_demand_user_misery"):
  865. return _map_field_name("user.id")
  866. if not self._arguments:
  867. return None
  868. return self._arguments[0]
  869. @property
  870. def metric_type(self) -> str:
  871. """Returns c, d or s representing if it's a counter, distribution or set."""
  872. return self._metric_type
  873. @cached_property
  874. def mri(self) -> str:
  875. """The unique identifier of the on-demand metric."""
  876. return f"{self._metric_type}:{CUSTOM_ALERT_METRIC_NAME}@none"
  877. @cached_property
  878. def _query_str_for_hash(self) -> str:
  879. """Returns a hash of the query and field to be used as a unique identifier for the on-demand metric."""
  880. str_to_hash = f"{self._field_for_hash()};{self._query_for_hash()}"
  881. if self.groupbys:
  882. # For compatibility with existing deployed metrics, leave existing hash untouched unless conditions are now
  883. # included in the spec.
  884. return f"{str_to_hash};{self._groupbys_for_hash()}"
  885. return str_to_hash
  886. @cached_property
  887. def query_hash(self) -> str:
  888. str_to_hash = self._query_str_for_hash
  889. hash = hashlib.shake_128(bytes(str_to_hash, encoding="ascii")).hexdigest(4)
  890. with sentry_sdk.start_span(op="OnDemandMetricSpec.query_hash", description=hash) as span:
  891. span.set_tag("str_to_hash", str_to_hash)
  892. return hash
  893. def _field_for_hash(self) -> Optional[str]:
  894. # Since derived metrics are a special case, we want to make sure that the hashing is different from the other
  895. # metrics.
  896. #
  897. # More specifically the hashing implementation will depend on the derived metric type:
  898. # - failure count & rate -> hash the op
  899. # - apdex -> hash the op + value
  900. #
  901. # The rationale for different hashing is complex to explain but the main idea is that if we hash the argument
  902. # and the conditions, we might have a case in which `count()` with condition `f` has the same hash as `apdex()`
  903. # with condition `f` and this will create a problem, since we might already have data for the `count()` and when
  904. # `apdex()` is created in the UI, we will use that metric but that metric didn't extract in the past the tags
  905. # that are used for apdex calculation, effectively causing problems with the data.
  906. if self.op in _NO_ARG_METRICS:
  907. return self.op
  908. elif self.op in _MULTIPLE_ARGS_METRICS:
  909. ret_val = f"{self.op}"
  910. for arg in self._arguments:
  911. ret_val += f":{arg}"
  912. return ret_val
  913. if not self._arguments:
  914. return None
  915. return self._arguments[0]
  916. def _query_for_hash(self) -> str:
  917. # In order to reduce the amount of metric being extracted, we perform a sort of the conditions tree. This
  918. # heuristic allows us to perform some de-duplication to minimize the number of metrics extracted for
  919. # semantically identical queries.
  920. #
  921. # In case we have `None` condition, we will use `None` string for hashing, so it's a sentinel value.
  922. return str(_deep_sorted(self.condition))
  923. def _groupbys_for_hash(self):
  924. # A sorted list of group-bys for the hash, since groupbys will be unique per on_demand metric.
  925. return str(sorted(self.groupbys))
  926. @cached_property
  927. def condition(self) -> Optional[RuleCondition]:
  928. """Returns a parent condition containing a list of other conditions which determine whether of not the metric
  929. is extracted."""
  930. return self._process_query()
  931. def tags_conditions(self, project: Project) -> List[TagSpec]:
  932. """Returns a list of tag conditions that will specify how tags are injected into metrics by Relay."""
  933. tags_specs_generator = _DERIVED_METRICS.get(self.op)
  934. if tags_specs_generator is None:
  935. return []
  936. return tags_specs_generator(project, self._arguments)
  937. def _tag_for_field(self, groupby: str) -> TagSpec:
  938. """Returns a TagSpec for a field, eg. a groupby"""
  939. field = _map_field_name(groupby)
  940. return {
  941. "key": groupby,
  942. "field": field,
  943. }
  944. def tags_groupbys(self, groupbys: Sequence[str]) -> List[TagSpec]:
  945. """Returns a list of tag specs generate for added groupbys, as they need to be stored separately for queries to work."""
  946. return [self._tag_for_field(groupby) for groupby in groupbys]
  947. def to_metric_spec(self, project: Project) -> MetricSpec:
  948. """Converts the OndemandMetricSpec into a MetricSpec that Relay can understand."""
  949. # Tag conditions are always computed based on the project.
  950. extended_tags_conditions = self.tags_conditions(project).copy()
  951. extended_tags_conditions.append({"key": QUERY_HASH_KEY, "value": self.query_hash})
  952. tag_from_groupbys = self.tags_groupbys(self.groupbys)
  953. extended_tags_conditions.extend(tag_from_groupbys)
  954. if self.spec_type == MetricSpecType.DYNAMIC_QUERY:
  955. extended_tags_conditions.append(self._tag_for_field("environment"))
  956. metric_spec: MetricSpec = {
  957. "category": DataCategory.TRANSACTION.api_name(),
  958. "mri": self.mri,
  959. "field": self.field_to_extract,
  960. "tags": extended_tags_conditions,
  961. }
  962. condition = self.condition
  963. if condition is not None:
  964. metric_spec["condition"] = condition
  965. return metric_spec
  966. def _process_field(self) -> Tuple[MetricOperationType, str, Optional[Sequence[str]]]:
  967. parsed_field = self._parse_field(self.field)
  968. op = self._get_op(parsed_field.function, parsed_field.arguments)
  969. metric_type = self._get_metric_type(parsed_field.function)
  970. return op, metric_type, self._parse_arguments(op, metric_type, parsed_field)
  971. def _process_query(self) -> Optional[RuleCondition]:
  972. # First step is to parse the query string into our internal AST format.
  973. parsed_query = self._parse_query(self.query)
  974. # We extend the parsed query with other conditions that we want to inject externally from the query. If it is
  975. # a simple query, we encode the environment in the query hash, instead of emitting it as a tag of the metric.
  976. if self.spec_type == MetricSpecType.SIMPLE_QUERY:
  977. parsed_query = self._extend_parsed_query(parsed_query)
  978. # Second step is to extract the conditions that might be present in the aggregate function (e.g. count_if).
  979. parsed_field = self._parse_field(self.field)
  980. aggregate_conditions = self._aggregate_conditions(parsed_field)
  981. # In case we have an empty query, but we have some conditions from the aggregate, we can just return them.
  982. if parsed_query.is_empty() and aggregate_conditions:
  983. return aggregate_conditions
  984. try:
  985. # Third step is to generate the actual Relay rule that contains all rules nested. We assume that the query
  986. # being passed here, can be satisfied ONLY by on demand metrics.
  987. rule_condition = SearchQueryConverter(parsed_query.conditions).convert()
  988. except Exception:
  989. if not parsed_query.is_empty():
  990. logger.exception("Error while converting search query '%s'", self.query)
  991. return None
  992. # If we don't have to merge the aggregate, we can just return the parsed rules.
  993. if not aggregate_conditions:
  994. return rule_condition
  995. # In case we have a top level rule which is not an "and" we have to wrap it.
  996. if rule_condition["op"] != "and":
  997. return {"op": "and", "inner": [rule_condition, aggregate_conditions]}
  998. # In the other case, we can just flatten the conditions.
  999. rule_condition["inner"].append(aggregate_conditions)
  1000. return rule_condition
  1001. def _extend_parsed_query(self, parsed_query_result: QueryParsingResult) -> QueryParsingResult:
  1002. conditions = cast(List[QueryToken], parsed_query_result.conditions)
  1003. new_conditions: List[QueryToken] = []
  1004. if self.environment is not None:
  1005. new_conditions.append(
  1006. SearchFilter(
  1007. key=SearchKey(name="environment"),
  1008. operator="=",
  1009. value=SearchValue(raw_value=self.environment),
  1010. )
  1011. )
  1012. extended_conditions = conditions
  1013. if new_conditions:
  1014. if self.use_updated_env_logic:
  1015. conditions = [ParenExpression(children=conditions)] if conditions else []
  1016. # This transformation is equivalent to (new_conditions) AND (conditions).
  1017. extended_conditions = [ParenExpression(children=new_conditions)] + conditions
  1018. else:
  1019. # This transformation is not behaving correctly since it can violate precedence rules. Since we use
  1020. # an AND condition for the environment, it will bind with higher priority than an OR specified in the
  1021. # user query, effectively resulting in the wrong condition (e.g., (X AND Y) OR Z != X AND (Y OR Z)).
  1022. #
  1023. # This transformation is equivalent to new_conditions and conditions.
  1024. extended_conditions = new_conditions + conditions
  1025. return QueryParsingResult(conditions=extended_conditions)
  1026. @staticmethod
  1027. def _aggregate_conditions(parsed_field: FieldParsingResult) -> Optional[RuleCondition]:
  1028. # We have to handle the special case for the "count_if" function, however it may be better to build some
  1029. # better abstracted code to handle third-party rule conditions injection.
  1030. if parsed_field.function == "count_if":
  1031. key, op, value = parsed_field.arguments
  1032. return _convert_countif_filter(key, op, value)
  1033. return None
  1034. @staticmethod
  1035. def _parse_arguments(
  1036. op: MetricOperationType, metric_type: str, parsed_field: FieldParsingResult
  1037. ) -> Optional[Sequence[str]]:
  1038. requires_arguments = metric_type in ["s", "d"] or op in _MULTIPLE_ARGS_METRICS
  1039. if not requires_arguments:
  1040. return None
  1041. if len(parsed_field.arguments) == 0:
  1042. raise Exception(f"The operation {op} supports one or more parameters")
  1043. arguments = parsed_field.arguments
  1044. return [_map_field_name(arguments[0])] if op not in _MULTIPLE_ARGS_METRICS else arguments
  1045. @staticmethod
  1046. def _get_op(function: str, args: Sequence[str]) -> MetricOperationType:
  1047. if function == "percentile":
  1048. percentile_op = _get_percentile_op(args)
  1049. if percentile_op is not None:
  1050. function = cast(str, percentile_op)
  1051. op = _SEARCH_TO_METRIC_AGGREGATES.get(function) or _SEARCH_TO_DERIVED_METRIC_AGGREGATES.get(
  1052. function
  1053. )
  1054. if op is not None:
  1055. return op
  1056. raise Exception(f"Unsupported aggregate function {function}")
  1057. @staticmethod
  1058. def _get_metric_type(function: str) -> str:
  1059. metric_type = _AGGREGATE_TO_METRIC_TYPE.get(function)
  1060. if metric_type is not None:
  1061. return metric_type
  1062. raise Exception(f"Unsupported aggregate function {function}")
  1063. @staticmethod
  1064. def _parse_field(value: str) -> FieldParsingResult:
  1065. try:
  1066. function, arguments, alias = _parse_function(value)
  1067. if function:
  1068. return FieldParsingResult(function=function, arguments=arguments, alias=alias)
  1069. # TODO: why is this here?
  1070. column = query_builder.resolve_column(value)
  1071. return column
  1072. except InvalidSearchQuery as e:
  1073. raise Exception(f"Unable to parse the field '{value}' in on demand spec: {e}")
  1074. @staticmethod
  1075. def _parse_query(value: str) -> QueryParsingResult:
  1076. """Parse query string into our internal AST format."""
  1077. try:
  1078. conditions = parse_search_query(query=value, removed_blacklisted=True)
  1079. # In order to avoid having issues with the parsing logic, we want to remove any unnecessary parentheses
  1080. # that are not needed, since if we had the parentheses this might lead to a different conditions tree, which
  1081. # in our case doesn't happen since SearchQueryConverter optimizes that case, but it can easily slip in other
  1082. # edge cases.
  1083. conditions = _remove_redundant_parentheses(conditions)
  1084. return QueryParsingResult(conditions=conditions)
  1085. except InvalidSearchQuery as e:
  1086. raise Exception(f"Invalid search query '{value}' in on demand spec: {e}")
  1087. def _convert_countif_filter(key: str, op: str, value: str) -> RuleCondition:
  1088. """Maps ``count_if`` arguments to a ``RuleCondition``."""
  1089. assert op in _COUNTIF_TO_RELAY_OPERATORS, f"Unsupported `count_if` operator {op}"
  1090. condition: RuleCondition = {
  1091. "op": _COUNTIF_TO_RELAY_OPERATORS[op],
  1092. "name": _map_field_name(key),
  1093. "value": fields.normalize_count_if_value({"column": key, "value": value}),
  1094. }
  1095. if op == "notEquals":
  1096. condition = {"op": "not", "inner": condition}
  1097. return condition
  1098. def _map_field_name(search_key: str) -> str:
  1099. """
  1100. Maps the name of a field in a search query to the event protocol path.
  1101. Raises an exception if the field is not supported.
  1102. """
  1103. # Map known fields using a static mapping.
  1104. if field := _SEARCH_TO_PROTOCOL_FIELDS.get(search_key):
  1105. return f"event.{field}"
  1106. # Measurements support generic access.
  1107. if search_key.startswith("measurements."):
  1108. return f"event.{search_key}.value"
  1109. # Run a schema-aware check for tags. Always use the resolver output,
  1110. # since it accounts for passing `tags[foo]` as key.
  1111. resolved = (resolve_column(Dataset.Transactions))(search_key)
  1112. if resolved == "transaction_name":
  1113. transaction_field = _SEARCH_TO_PROTOCOL_FIELDS.get("transaction")
  1114. return f"event.{transaction_field}"
  1115. if resolved.startswith("tags["):
  1116. return f"event.tags.{resolved[5:-1]}"
  1117. raise ValueError(f"Unsupported query field {search_key}")
  1118. def _get_satisfactory_threshold_and_metric(project: Project) -> Tuple[int, str]:
  1119. """It returns the statisfactory response time threshold for the project and
  1120. the associated metric ("transaction.duration" or "measurements.lcp")."""
  1121. result = ProjectTransactionThreshold.filter(
  1122. organization_id=project.organization.id,
  1123. project_ids=[project.id],
  1124. order_by=[],
  1125. value_list=["threshold", "metric"],
  1126. )
  1127. if len(result) == 0:
  1128. # We use the default threshold shown in the UI.
  1129. threshold = APDEX_THRESHOLD_DEFAULT
  1130. metric = TransactionMetric.DURATION.value
  1131. else:
  1132. # We technically don't use this threshold since we extract it from the apdex(x) field
  1133. # where x is the threshold, however, we still return it in case a fallback is needed.
  1134. threshold, metric = result[0]
  1135. if metric == TransactionMetric.DURATION.value:
  1136. metric_field = "transaction.duration"
  1137. elif metric == TransactionMetric.LCP.value:
  1138. # We assume it's lcp since the enumerator contains only two possibilities.
  1139. metric_field = "measurements.lcp"
  1140. else:
  1141. raise Exception("Invalid metric for project transaction threshold")
  1142. return threshold, metric_field
  1143. T = TypeVar("T")
  1144. class SearchQueryConverter:
  1145. """
  1146. A converter from search query token stream to rule conditions.
  1147. Pass a token stream obtained from `parse_search_query` to the constructor.
  1148. The converter can be used exactly once.
  1149. """
  1150. def __init__(self, tokens: Sequence[QueryToken]):
  1151. self._tokens = tokens
  1152. self._position = 0
  1153. def convert(self) -> RuleCondition:
  1154. """
  1155. Converts the token stream into a rule condition.
  1156. This function can raise an exception if the token stream is structurally
  1157. invalid or contains fields that are not supported by the rule engine.
  1158. """
  1159. condition = self._expr()
  1160. if self._position < len(self._tokens):
  1161. raise ValueError("Unexpected trailing tokens")
  1162. return condition
  1163. def _peek(self) -> Optional[QueryToken]:
  1164. """Returns the next token without consuming it."""
  1165. if self._position < len(self._tokens):
  1166. return self._tokens[self._position]
  1167. else:
  1168. return None
  1169. def _consume(self, pattern: Union[str, Type[T]]) -> Optional[T]:
  1170. """
  1171. Consumes the next token if it matches the given pattern.
  1172. The pattern can be:
  1173. - a literal string, in which case the token must be equal to the string
  1174. - a type, in which case the token must be an instance of the type
  1175. Returns the token if it matches, or ``None`` otherwise.
  1176. """
  1177. token = self._peek()
  1178. if isinstance(pattern, str) and token != pattern:
  1179. return None
  1180. elif isinstance(pattern, type) and not isinstance(token, pattern):
  1181. return None
  1182. self._position += 1
  1183. return cast(T, token)
  1184. def _expr(self) -> RuleCondition:
  1185. terms = [self._term()]
  1186. while self._consume("OR") is not None:
  1187. terms.append(self._term())
  1188. if len(terms) == 1:
  1189. return terms[0]
  1190. else:
  1191. return {"op": "or", "inner": terms}
  1192. def _term(self) -> RuleCondition:
  1193. factors = [self._factor()]
  1194. while self._peek() not in ("OR", None):
  1195. self._consume("AND") # AND is optional and implicit, ignore if present.
  1196. factors.append(self._factor())
  1197. if len(factors) == 1:
  1198. return factors[0]
  1199. else:
  1200. return {"op": "and", "inner": factors}
  1201. def _factor(self) -> RuleCondition:
  1202. if filt := self._consume(SearchFilter):
  1203. return self._filter(filt)
  1204. elif paren := self._consume(ParenExpression):
  1205. return SearchQueryConverter(paren.children).convert()
  1206. elif token := self._peek():
  1207. raise ValueError(f"Unexpected token {token}")
  1208. else:
  1209. raise ValueError("Unexpected end of query")
  1210. def _filter(self, token: SearchFilter) -> RuleCondition:
  1211. operator = _SEARCH_TO_RELAY_OPERATORS.get(token.operator)
  1212. if not operator:
  1213. raise ValueError(f"Unsupported operator {token.operator}")
  1214. # We propagate the filter in order to give as output a better error message with more context.
  1215. key: str = token.key.name
  1216. value: Any = token.value.raw_value
  1217. if operator == "eq" and token.value.is_wildcard():
  1218. condition: RuleCondition = {
  1219. "op": "glob",
  1220. "name": _map_field_name(key),
  1221. "value": [value],
  1222. }
  1223. else:
  1224. # Special case for the `has` and `!has` operators which are parsed as follows:
  1225. # - `has:x` -> `x != ""`
  1226. # - `!has:x` -> `x = ""`
  1227. # They both need to be translated to `x not eq null` and `x eq null`.
  1228. if token.operator in ("!=", "=") and value == "":
  1229. value = None
  1230. if isinstance(value, str):
  1231. value = event_search.translate_escape_sequences(value)
  1232. condition = {
  1233. "op": operator,
  1234. "name": _map_field_name(key),
  1235. "value": value,
  1236. }
  1237. # In case we have negation operators, we have to wrap them in the `not` condition.
  1238. if token.operator in ("!=", "NOT IN"):
  1239. condition = {"op": "not", "inner": condition}
  1240. return condition