test_builder.py 87 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420
  1. import datetime
  2. import math
  3. import re
  4. from typing import List
  5. from unittest import mock
  6. import pytest
  7. from django.utils import timezone
  8. from snuba_sdk.aliased_expression import AliasedExpression
  9. from snuba_sdk.column import Column
  10. from snuba_sdk.conditions import Condition, Op, Or
  11. from snuba_sdk.function import Function
  12. from snuba_sdk.orderby import Direction, LimitBy, OrderBy
  13. from sentry.exceptions import IncompatibleMetricsQuery, InvalidSearchQuery
  14. from sentry.search.events import constants
  15. from sentry.search.events.builder import (
  16. HistogramMetricQueryBuilder,
  17. MetricsQueryBuilder,
  18. QueryBuilder,
  19. TimeseriesMetricQueryBuilder,
  20. )
  21. from sentry.search.events.types import HistogramParams
  22. from sentry.sentry_metrics import indexer
  23. from sentry.testutils.cases import MetricsEnhancedPerformanceTestCase, TestCase
  24. from sentry.utils.snuba import Dataset, QueryOutsideRetentionError
  25. class QueryBuilderTest(TestCase):
  26. def setUp(self):
  27. self.start = datetime.datetime.now(tz=timezone.utc).replace(
  28. hour=10, minute=15, second=0, microsecond=0
  29. ) - datetime.timedelta(days=2)
  30. self.end = self.start + datetime.timedelta(days=1)
  31. self.projects = [1, 2, 3]
  32. self.params = {
  33. "project_id": self.projects,
  34. "start": self.start,
  35. "end": self.end,
  36. }
  37. # These conditions should always be on a query when self.params is passed
  38. self.default_conditions = [
  39. Condition(Column("timestamp"), Op.GTE, self.start),
  40. Condition(Column("timestamp"), Op.LT, self.end),
  41. Condition(Column("project_id"), Op.IN, self.projects),
  42. ]
  43. def test_simple_query(self):
  44. query = QueryBuilder(
  45. Dataset.Discover,
  46. self.params,
  47. "user.email:foo@example.com release:1.2.1",
  48. ["user.email", "release"],
  49. )
  50. self.assertCountEqual(
  51. query.where,
  52. [
  53. Condition(Column("email"), Op.EQ, "foo@example.com"),
  54. Condition(Column("release"), Op.IN, ["1.2.1"]),
  55. *self.default_conditions,
  56. ],
  57. )
  58. self.assertCountEqual(
  59. query.columns,
  60. [
  61. AliasedExpression(Column("email"), "user.email"),
  62. Column("release"),
  63. ],
  64. )
  65. query.get_snql_query().validate()
  66. def test_simple_orderby(self):
  67. query = QueryBuilder(
  68. Dataset.Discover,
  69. self.params,
  70. selected_columns=["user.email", "release"],
  71. orderby=["user.email"],
  72. )
  73. self.assertCountEqual(query.where, self.default_conditions)
  74. self.assertCountEqual(
  75. query.orderby,
  76. [OrderBy(Column("email"), Direction.ASC)],
  77. )
  78. query.get_snql_query().validate()
  79. query = QueryBuilder(
  80. Dataset.Discover,
  81. self.params,
  82. selected_columns=["user.email", "release"],
  83. orderby=["-user.email"],
  84. )
  85. self.assertCountEqual(query.where, self.default_conditions)
  86. self.assertCountEqual(
  87. query.orderby,
  88. [OrderBy(Column("email"), Direction.DESC)],
  89. )
  90. query.get_snql_query().validate()
  91. def test_orderby_duplicate_columns(self):
  92. query = QueryBuilder(
  93. Dataset.Discover,
  94. self.params,
  95. selected_columns=["user.email", "user.email"],
  96. orderby=["user.email"],
  97. )
  98. self.assertCountEqual(
  99. query.orderby,
  100. [OrderBy(Column("email"), Direction.ASC)],
  101. )
  102. def test_simple_limitby(self):
  103. query = QueryBuilder(
  104. dataset=Dataset.Discover,
  105. params=self.params,
  106. query="",
  107. selected_columns=["message"],
  108. orderby="message",
  109. limitby=("message", 1),
  110. limit=4,
  111. )
  112. assert query.limitby == LimitBy([Column("message")], 1)
  113. def test_environment_filter(self):
  114. query = QueryBuilder(
  115. Dataset.Discover,
  116. self.params,
  117. "environment:prod",
  118. ["environment"],
  119. )
  120. self.assertCountEqual(
  121. query.where,
  122. [
  123. Condition(Column("environment"), Op.EQ, "prod"),
  124. *self.default_conditions,
  125. ],
  126. )
  127. query.get_snql_query().validate()
  128. query = QueryBuilder(
  129. Dataset.Discover,
  130. self.params,
  131. "environment:[dev, prod]",
  132. ["environment"],
  133. )
  134. self.assertCountEqual(
  135. query.where,
  136. [
  137. Condition(Column("environment"), Op.IN, ["dev", "prod"]),
  138. *self.default_conditions,
  139. ],
  140. )
  141. query.get_snql_query().validate()
  142. def test_environment_param(self):
  143. self.params["environment"] = ["", "prod"]
  144. query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"])
  145. self.assertCountEqual(
  146. query.where,
  147. [
  148. *self.default_conditions,
  149. Or(
  150. [
  151. Condition(Column("environment"), Op.IS_NULL),
  152. Condition(Column("environment"), Op.EQ, "prod"),
  153. ]
  154. ),
  155. ],
  156. )
  157. query.get_snql_query().validate()
  158. self.params["environment"] = ["dev", "prod"]
  159. query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["environment"])
  160. self.assertCountEqual(
  161. query.where,
  162. [
  163. *self.default_conditions,
  164. Condition(Column("environment"), Op.IN, ["dev", "prod"]),
  165. ],
  166. )
  167. query.get_snql_query().validate()
  168. def test_project_in_condition_filters(self):
  169. # TODO(snql-boolean): Update this to match the corresponding test in test_filter
  170. project1 = self.create_project()
  171. project2 = self.create_project()
  172. self.params["project_id"] = [project1.id, project2.id]
  173. query = QueryBuilder(
  174. Dataset.Discover,
  175. self.params,
  176. f"project:{project1.slug}",
  177. selected_columns=["environment"],
  178. )
  179. self.assertCountEqual(
  180. query.where,
  181. [
  182. # generated by the search query on project
  183. Condition(Column("project_id"), Op.EQ, project1.id),
  184. Condition(Column("timestamp"), Op.GTE, self.start),
  185. Condition(Column("timestamp"), Op.LT, self.end),
  186. # default project filter from the params
  187. Condition(Column("project_id"), Op.IN, [project1.id, project2.id]),
  188. ],
  189. )
  190. def test_project_in_condition_filters_not_in_project_filter(self):
  191. # TODO(snql-boolean): Update this to match the corresponding test in test_filter
  192. project1 = self.create_project()
  193. project2 = self.create_project()
  194. # params is assumed to be validated at this point, so this query should be invalid
  195. self.params["project_id"] = [project2.id]
  196. with self.assertRaisesRegex(
  197. InvalidSearchQuery,
  198. re.escape(
  199. f"Invalid query. Project(s) {str(project1.slug)} do not exist or are not actively selected."
  200. ),
  201. ):
  202. QueryBuilder(
  203. Dataset.Discover,
  204. self.params,
  205. f"project:{project1.slug}",
  206. selected_columns=["environment"],
  207. )
  208. def test_project_alias_column(self):
  209. # TODO(snql-boolean): Update this to match the corresponding test in test_filter
  210. project1 = self.create_project()
  211. project2 = self.create_project()
  212. self.params["project_id"] = [project1.id, project2.id]
  213. query = QueryBuilder(Dataset.Discover, self.params, selected_columns=["project"])
  214. self.assertCountEqual(
  215. query.where,
  216. [
  217. Condition(Column("project_id"), Op.IN, [project1.id, project2.id]),
  218. Condition(Column("timestamp"), Op.GTE, self.start),
  219. Condition(Column("timestamp"), Op.LT, self.end),
  220. ],
  221. )
  222. self.assertCountEqual(
  223. query.columns,
  224. [
  225. Function(
  226. "transform",
  227. [
  228. Column("project_id"),
  229. [project1.id, project2.id],
  230. [project1.slug, project2.slug],
  231. "",
  232. ],
  233. "project",
  234. )
  235. ],
  236. )
  237. def test_project_alias_column_with_project_condition(self):
  238. project1 = self.create_project()
  239. project2 = self.create_project()
  240. self.params["project_id"] = [project1.id, project2.id]
  241. query = QueryBuilder(
  242. Dataset.Discover, self.params, f"project:{project1.slug}", selected_columns=["project"]
  243. )
  244. self.assertCountEqual(
  245. query.where,
  246. [
  247. # generated by the search query on project
  248. Condition(Column("project_id"), Op.EQ, project1.id),
  249. Condition(Column("timestamp"), Op.GTE, self.start),
  250. Condition(Column("timestamp"), Op.LT, self.end),
  251. # default project filter from the params
  252. Condition(Column("project_id"), Op.IN, [project1.id, project2.id]),
  253. ],
  254. )
  255. # Because of the condition on project there should only be 1 project in the transform
  256. self.assertCountEqual(
  257. query.columns,
  258. [
  259. Function(
  260. "transform",
  261. [
  262. Column("project_id"),
  263. [project1.id],
  264. [project1.slug],
  265. "",
  266. ],
  267. "project",
  268. )
  269. ],
  270. )
  271. def test_count_if(self):
  272. query = QueryBuilder(
  273. Dataset.Discover,
  274. self.params,
  275. "",
  276. selected_columns=[
  277. "count_if(event.type,equals,transaction)",
  278. 'count_if(event.type,notEquals,"transaction")',
  279. ],
  280. )
  281. self.assertCountEqual(query.where, self.default_conditions)
  282. self.assertCountEqual(
  283. query.aggregates,
  284. [
  285. Function(
  286. "countIf",
  287. [
  288. Function("equals", [Column("type"), "transaction"]),
  289. ],
  290. "count_if_event_type_equals_transaction",
  291. ),
  292. Function(
  293. "countIf",
  294. [
  295. Function("notEquals", [Column("type"), "transaction"]),
  296. ],
  297. "count_if_event_type_notEquals__transaction",
  298. ),
  299. ],
  300. )
  301. def test_count_if_with_tags(self):
  302. query = QueryBuilder(
  303. Dataset.Discover,
  304. self.params,
  305. "",
  306. selected_columns=[
  307. "count_if(foo,equals,bar)",
  308. 'count_if(foo,notEquals,"baz")',
  309. ],
  310. )
  311. self.assertCountEqual(query.where, self.default_conditions)
  312. self.assertCountEqual(
  313. query.aggregates,
  314. [
  315. Function(
  316. "countIf",
  317. [
  318. Function("equals", [Column("tags[foo]"), "bar"]),
  319. ],
  320. "count_if_foo_equals_bar",
  321. ),
  322. Function(
  323. "countIf",
  324. [
  325. Function("notEquals", [Column("tags[foo]"), "baz"]),
  326. ],
  327. "count_if_foo_notEquals__baz",
  328. ),
  329. ],
  330. )
  331. def test_array_join(self):
  332. query = QueryBuilder(
  333. Dataset.Discover,
  334. self.params,
  335. "",
  336. selected_columns=["array_join(measurements_key)", "count()"],
  337. functions_acl=["array_join"],
  338. )
  339. array_join_column = Function(
  340. "arrayJoin",
  341. [Column("measurements.key")],
  342. "array_join_measurements_key",
  343. )
  344. self.assertCountEqual(query.columns, [array_join_column, Function("count", [], "count")])
  345. # make sure the the array join columns are present in gropuby
  346. self.assertCountEqual(query.groupby, [array_join_column])
  347. def test_retention(self):
  348. old_start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc)
  349. old_end = datetime.datetime(2015, 5, 19, 10, 15, 1, tzinfo=timezone.utc)
  350. old_params = {**self.params, "start": old_start, "end": old_end}
  351. with self.options({"system.event-retention-days": 10}):
  352. with self.assertRaises(QueryOutsideRetentionError):
  353. QueryBuilder(
  354. Dataset.Discover,
  355. old_params,
  356. "",
  357. selected_columns=[],
  358. )
  359. def test_array_combinator(self):
  360. query = QueryBuilder(
  361. Dataset.Discover,
  362. self.params,
  363. "",
  364. selected_columns=["sumArray(measurements_value)"],
  365. functions_acl=["sumArray"],
  366. )
  367. self.assertCountEqual(
  368. query.columns,
  369. [
  370. Function(
  371. "sum",
  372. [Function("arrayJoin", [Column("measurements.value")])],
  373. "sumArray_measurements_value",
  374. )
  375. ],
  376. )
  377. def test_array_combinator_is_private(self):
  378. with self.assertRaisesRegex(InvalidSearchQuery, "sum: no access to private function"):
  379. QueryBuilder(
  380. Dataset.Discover,
  381. self.params,
  382. "",
  383. selected_columns=["sumArray(measurements_value)"],
  384. )
  385. def test_array_combinator_with_non_array_arg(self):
  386. with self.assertRaisesRegex(InvalidSearchQuery, "stuff is not a valid array column"):
  387. QueryBuilder(
  388. Dataset.Discover,
  389. self.params,
  390. "",
  391. selected_columns=["sumArray(stuff)"],
  392. functions_acl=["sumArray"],
  393. )
  394. def test_spans_columns(self):
  395. query = QueryBuilder(
  396. Dataset.Discover,
  397. self.params,
  398. "",
  399. selected_columns=[
  400. "array_join(spans_op)",
  401. "array_join(spans_group)",
  402. "sumArray(spans_exclusive_time)",
  403. ],
  404. functions_acl=["array_join", "sumArray"],
  405. )
  406. self.assertCountEqual(
  407. query.columns,
  408. [
  409. Function("arrayJoin", [Column("spans.op")], "array_join_spans_op"),
  410. Function("arrayJoin", [Column("spans.group")], "array_join_spans_group"),
  411. Function(
  412. "sum",
  413. [Function("arrayJoin", [Column("spans.exclusive_time")])],
  414. "sumArray_spans_exclusive_time",
  415. ),
  416. ],
  417. )
  418. def test_array_join_clause(self):
  419. query = QueryBuilder(
  420. Dataset.Discover,
  421. self.params,
  422. "",
  423. selected_columns=[
  424. "spans_op",
  425. "count()",
  426. ],
  427. array_join="spans_op",
  428. )
  429. self.assertCountEqual(
  430. query.columns,
  431. [
  432. AliasedExpression(Column("spans.op"), "spans_op"),
  433. Function("count", [], "count"),
  434. ],
  435. )
  436. assert query.array_join == [Column("spans.op")]
  437. query.get_snql_query().validate()
  438. def test_sample_rate(self):
  439. query = QueryBuilder(
  440. Dataset.Discover,
  441. self.params,
  442. "",
  443. selected_columns=[
  444. "count()",
  445. ],
  446. sample_rate=0.1,
  447. )
  448. assert query.sample_rate == 0.1
  449. snql_query = query.get_snql_query().query
  450. snql_query.validate()
  451. assert snql_query.match.sample == 0.1
  452. def test_turbo(self):
  453. query = QueryBuilder(
  454. Dataset.Discover,
  455. self.params,
  456. "",
  457. selected_columns=[
  458. "count()",
  459. ],
  460. turbo=True,
  461. )
  462. assert query.turbo
  463. snql_query = query.get_snql_query()
  464. snql_query.validate()
  465. assert snql_query.flags.turbo
  466. def test_auto_aggregation(self):
  467. query = QueryBuilder(
  468. Dataset.Discover,
  469. self.params,
  470. "count_unique(user):>10",
  471. selected_columns=[
  472. "count()",
  473. ],
  474. auto_aggregations=True,
  475. use_aggregate_conditions=True,
  476. )
  477. snql_query = query.get_snql_query().query
  478. snql_query.validate()
  479. self.assertCountEqual(
  480. snql_query.having,
  481. [
  482. Condition(Function("uniq", [Column("user")], "count_unique_user"), Op.GT, 10),
  483. ],
  484. )
  485. self.assertCountEqual(
  486. snql_query.select,
  487. [
  488. Function("uniq", [Column("user")], "count_unique_user"),
  489. Function("count", [], "count"),
  490. ],
  491. )
  492. def test_auto_aggregation_with_boolean(self):
  493. query = QueryBuilder(
  494. Dataset.Discover,
  495. self.params,
  496. # Nonsense query but doesn't matter
  497. "count_unique(user):>10 OR count_unique(user):<10",
  498. selected_columns=[
  499. "count()",
  500. ],
  501. auto_aggregations=True,
  502. use_aggregate_conditions=True,
  503. )
  504. snql_query = query.get_snql_query().query
  505. snql_query.validate()
  506. self.assertCountEqual(
  507. snql_query.having,
  508. [
  509. Or(
  510. [
  511. Condition(
  512. Function("uniq", [Column("user")], "count_unique_user"), Op.GT, 10
  513. ),
  514. Condition(
  515. Function("uniq", [Column("user")], "count_unique_user"), Op.LT, 10
  516. ),
  517. ]
  518. )
  519. ],
  520. )
  521. self.assertCountEqual(
  522. snql_query.select,
  523. [
  524. Function("uniq", [Column("user")], "count_unique_user"),
  525. Function("count", [], "count"),
  526. ],
  527. )
  528. def test_disable_auto_aggregation(self):
  529. query = QueryBuilder(
  530. Dataset.Discover,
  531. self.params,
  532. "count_unique(user):>10",
  533. selected_columns=[
  534. "count()",
  535. ],
  536. auto_aggregations=False,
  537. use_aggregate_conditions=True,
  538. )
  539. # With count_unique only in a condition and no auto_aggregations this should raise a invalid search query
  540. with self.assertRaises(InvalidSearchQuery):
  541. query.get_snql_query()
  542. def test_query_chained_or_tip(self):
  543. query = QueryBuilder(
  544. Dataset.Discover,
  545. self.params,
  546. "field:a OR field:b OR field:c",
  547. selected_columns=[
  548. "field",
  549. ],
  550. )
  551. assert constants.QUERY_TIPS["CHAINED_OR"] in query.tips["query"]
  552. def test_chained_or_with_different_terms(self):
  553. query = QueryBuilder(
  554. Dataset.Discover,
  555. self.params,
  556. "field:a or field:b or event.type:transaction or transaction:foo",
  557. selected_columns=[
  558. "field",
  559. ],
  560. )
  561. # This query becomes something roughly like:
  562. # field:a or (field:b or (event.type:transaciton or transaction: foo))
  563. assert constants.QUERY_TIPS["CHAINED_OR"] in query.tips["query"]
  564. query = QueryBuilder(
  565. Dataset.Discover,
  566. self.params,
  567. "event.type:transaction or transaction:foo or field:a or field:b",
  568. selected_columns=[
  569. "field",
  570. ],
  571. )
  572. assert constants.QUERY_TIPS["CHAINED_OR"] in query.tips["query"]
  573. def test_chained_or_with_different_terms_with_and(self):
  574. query = QueryBuilder(
  575. Dataset.Discover,
  576. self.params,
  577. # There's an implicit and between field:b, and event.type:transaction
  578. "field:a or field:b event.type:transaction",
  579. selected_columns=[
  580. "field",
  581. ],
  582. )
  583. # This query becomes something roughly like:
  584. # field:a or (field:b and event.type:transaction)
  585. assert constants.QUERY_TIPS["CHAINED_OR"] not in query.tips["query"]
  586. query = QueryBuilder(
  587. Dataset.Discover,
  588. self.params,
  589. # There's an implicit and between event.type:transaction, and field:a
  590. "event.type:transaction field:a or field:b",
  591. selected_columns=[
  592. "field",
  593. ],
  594. )
  595. # This query becomes something roughly like:
  596. # field:a or (field:b and event.type:transaction)
  597. assert constants.QUERY_TIPS["CHAINED_OR"] not in query.tips["query"]
  598. def _metric_percentile_definition(
  599. org_id, quantile, field="transaction.duration", alias=None
  600. ) -> Function:
  601. if alias is None:
  602. alias = f"p{quantile}_{field.replace('.', '_')}"
  603. return Function(
  604. "arrayElement",
  605. [
  606. Function(
  607. f"quantilesIf(0.{quantile.rstrip('0')})",
  608. [
  609. Column("value"),
  610. Function(
  611. "equals",
  612. [
  613. Column("metric_id"),
  614. indexer.resolve(org_id, constants.METRICS_MAP[field]),
  615. ],
  616. ),
  617. ],
  618. ),
  619. 1,
  620. ],
  621. alias,
  622. )
  623. def _metric_conditions(org_id, metrics) -> List[Condition]:
  624. return [
  625. Condition(
  626. Column("metric_id"),
  627. Op.IN,
  628. sorted(indexer.resolve(org_id, constants.METRICS_MAP[metric]) for metric in metrics),
  629. )
  630. ]
  631. class MetricBuilderBaseTest(MetricsEnhancedPerformanceTestCase):
  632. METRIC_STRINGS = [
  633. "foo_transaction",
  634. "bar_transaction",
  635. "baz_transaction",
  636. ]
  637. DEFAULT_METRIC_TIMESTAMP = datetime.datetime(
  638. 2015, 1, 1, 10, 15, 0, tzinfo=timezone.utc
  639. ) + datetime.timedelta(minutes=1)
  640. def setUp(self):
  641. super().setUp()
  642. self.start = datetime.datetime.now(tz=timezone.utc).replace(
  643. hour=10, minute=15, second=0, microsecond=0
  644. ) - datetime.timedelta(days=18)
  645. self.end = datetime.datetime.now(tz=timezone.utc).replace(
  646. hour=10, minute=15, second=0, microsecond=0
  647. )
  648. self.projects = [self.project.id]
  649. self.params = {
  650. "organization_id": self.organization.id,
  651. "project_id": self.projects,
  652. "start": self.start,
  653. "end": self.end,
  654. }
  655. # These conditions should always be on a query when self.params is passed
  656. self.default_conditions = [
  657. Condition(Column("timestamp"), Op.GTE, self.start),
  658. Condition(Column("timestamp"), Op.LT, self.end),
  659. Condition(Column("project_id"), Op.IN, self.projects),
  660. Condition(Column("org_id"), Op.EQ, self.organization.id),
  661. ]
  662. for string in self.METRIC_STRINGS:
  663. indexer.record(self.organization.id, string)
  664. indexer.record(self.organization.id, "transaction")
  665. def setup_orderby_data(self):
  666. self.store_metric(
  667. 100,
  668. tags={"transaction": "foo_transaction"},
  669. timestamp=self.start + datetime.timedelta(minutes=5),
  670. )
  671. self.store_metric(
  672. 1,
  673. metric="user",
  674. tags={"transaction": "foo_transaction"},
  675. timestamp=self.start + datetime.timedelta(minutes=5),
  676. )
  677. self.store_metric(
  678. 50,
  679. tags={"transaction": "bar_transaction"},
  680. timestamp=self.start + datetime.timedelta(minutes=5),
  681. )
  682. self.store_metric(
  683. 1,
  684. metric="user",
  685. tags={"transaction": "bar_transaction"},
  686. timestamp=self.start + datetime.timedelta(minutes=5),
  687. )
  688. self.store_metric(
  689. 2,
  690. metric="user",
  691. tags={"transaction": "bar_transaction"},
  692. timestamp=self.start + datetime.timedelta(minutes=5),
  693. )
  694. class MetricQueryBuilderTest(MetricBuilderBaseTest):
  695. def test_default_conditions(self):
  696. query = MetricsQueryBuilder(self.params, "", selected_columns=[])
  697. self.assertCountEqual(query.where, self.default_conditions)
  698. def test_column_resolution(self):
  699. query = MetricsQueryBuilder(
  700. self.params,
  701. "",
  702. selected_columns=["tags[transaction]", "transaction"],
  703. )
  704. self.assertCountEqual(
  705. query.columns,
  706. [
  707. AliasedExpression(
  708. Column(f"tags[{indexer.resolve(self.organization.id, 'transaction')}]"),
  709. "tags[transaction]",
  710. ),
  711. AliasedExpression(
  712. Column(f"tags[{indexer.resolve(self.organization.id, 'transaction')}]"),
  713. "transaction",
  714. ),
  715. ],
  716. )
  717. def test_simple_aggregates(self):
  718. query = MetricsQueryBuilder(
  719. self.params,
  720. "",
  721. selected_columns=[
  722. "p50(transaction.duration)",
  723. "p75(measurements.lcp)",
  724. "p90(measurements.fcp)",
  725. "p95(measurements.cls)",
  726. "p99(measurements.fid)",
  727. ],
  728. )
  729. self.assertCountEqual(
  730. query.where,
  731. [
  732. *self.default_conditions,
  733. *_metric_conditions(
  734. self.organization.id,
  735. [
  736. "transaction.duration",
  737. "measurements.lcp",
  738. "measurements.fcp",
  739. "measurements.cls",
  740. "measurements.fid",
  741. ],
  742. ),
  743. ],
  744. )
  745. self.assertCountEqual(
  746. query.distributions,
  747. [
  748. _metric_percentile_definition(self.organization.id, "50"),
  749. _metric_percentile_definition(self.organization.id, "75", "measurements.lcp"),
  750. _metric_percentile_definition(self.organization.id, "90", "measurements.fcp"),
  751. _metric_percentile_definition(self.organization.id, "95", "measurements.cls"),
  752. _metric_percentile_definition(self.organization.id, "99", "measurements.fid"),
  753. ],
  754. )
  755. def test_custom_percentile_throws_error(self):
  756. with self.assertRaises(IncompatibleMetricsQuery):
  757. MetricsQueryBuilder(
  758. self.params,
  759. "",
  760. selected_columns=[
  761. "percentile(transaction.duration, 0.11)",
  762. ],
  763. )
  764. def test_percentile_function(self):
  765. self.maxDiff = None
  766. query = MetricsQueryBuilder(
  767. self.params,
  768. "",
  769. selected_columns=[
  770. "percentile(transaction.duration, 0.75)",
  771. ],
  772. )
  773. self.assertCountEqual(
  774. query.where,
  775. [
  776. *self.default_conditions,
  777. *_metric_conditions(
  778. self.organization.id,
  779. [
  780. "transaction.duration",
  781. ],
  782. ),
  783. ],
  784. )
  785. self.assertCountEqual(
  786. query.distributions,
  787. [
  788. Function(
  789. "arrayElement",
  790. [
  791. Function(
  792. "quantilesIf(0.75)",
  793. [
  794. Column("value"),
  795. Function(
  796. "equals",
  797. [
  798. Column("metric_id"),
  799. indexer.resolve(
  800. self.organization.id,
  801. constants.METRICS_MAP["transaction.duration"],
  802. ),
  803. ],
  804. ),
  805. ],
  806. ),
  807. 1,
  808. ],
  809. "percentile_transaction_duration_0_75",
  810. )
  811. ],
  812. )
  813. def test_metric_condition_dedupe(self):
  814. org_id = 1
  815. query = MetricsQueryBuilder(
  816. self.params,
  817. "",
  818. selected_columns=[
  819. "p50(transaction.duration)",
  820. "p75(transaction.duration)",
  821. "p90(transaction.duration)",
  822. "p95(transaction.duration)",
  823. "p99(transaction.duration)",
  824. ],
  825. )
  826. self.assertCountEqual(
  827. query.where,
  828. [
  829. *self.default_conditions,
  830. *_metric_conditions(org_id, ["transaction.duration"]),
  831. ],
  832. )
  833. def test_p100(self):
  834. """While p100 isn't an actual quantile in the distributions table, its equivalent to max"""
  835. query = MetricsQueryBuilder(
  836. self.params,
  837. "",
  838. selected_columns=[
  839. "p100(transaction.duration)",
  840. ],
  841. )
  842. self.assertCountEqual(
  843. query.where,
  844. [
  845. *self.default_conditions,
  846. *_metric_conditions(
  847. self.organization.id,
  848. [
  849. "transaction.duration",
  850. ],
  851. ),
  852. ],
  853. )
  854. self.assertCountEqual(
  855. query.distributions,
  856. [
  857. Function(
  858. "maxIf",
  859. [
  860. Column("value"),
  861. Function(
  862. "equals",
  863. [
  864. Column("metric_id"),
  865. indexer.resolve(
  866. self.organization.id,
  867. constants.METRICS_MAP["transaction.duration"],
  868. ),
  869. ],
  870. ),
  871. ],
  872. "p100_transaction_duration",
  873. )
  874. ],
  875. )
  876. def test_grouping(self):
  877. query = MetricsQueryBuilder(
  878. self.params,
  879. "",
  880. selected_columns=["transaction", "project", "p95(transaction.duration)"],
  881. )
  882. self.assertCountEqual(
  883. query.where,
  884. [
  885. *self.default_conditions,
  886. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  887. ],
  888. )
  889. transaction_index = indexer.resolve(self.organization.id, "transaction")
  890. transaction = AliasedExpression(
  891. Column(f"tags[{transaction_index}]"),
  892. "transaction",
  893. )
  894. project = Function(
  895. "transform",
  896. [
  897. Column("project_id"),
  898. [self.project.id],
  899. [self.project.slug],
  900. "",
  901. ],
  902. "project",
  903. )
  904. self.assertCountEqual(
  905. query.groupby,
  906. [
  907. transaction,
  908. project,
  909. ],
  910. )
  911. self.assertCountEqual(
  912. query.distributions, [_metric_percentile_definition(self.organization.id, "95")]
  913. )
  914. def test_transaction_filter(self):
  915. query = MetricsQueryBuilder(
  916. self.params,
  917. "transaction:foo_transaction",
  918. selected_columns=["transaction", "project", "p95(transaction.duration)"],
  919. )
  920. transaction_index = indexer.resolve(self.organization.id, "transaction")
  921. transaction_name = indexer.resolve(self.organization.id, "foo_transaction")
  922. transaction = Column(f"tags[{transaction_index}]")
  923. self.assertCountEqual(
  924. query.where,
  925. [
  926. *self.default_conditions,
  927. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  928. Condition(transaction, Op.EQ, transaction_name),
  929. ],
  930. )
  931. def test_transaction_in_filter(self):
  932. query = MetricsQueryBuilder(
  933. self.params,
  934. "transaction:[foo_transaction, bar_transaction]",
  935. selected_columns=["transaction", "project", "p95(transaction.duration)"],
  936. )
  937. transaction_index = indexer.resolve(self.organization.id, "transaction")
  938. transaction_name1 = indexer.resolve(self.organization.id, "foo_transaction")
  939. transaction_name2 = indexer.resolve(self.organization.id, "bar_transaction")
  940. transaction = Column(f"tags[{transaction_index}]")
  941. self.assertCountEqual(
  942. query.where,
  943. [
  944. *self.default_conditions,
  945. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  946. Condition(transaction, Op.IN, [transaction_name1, transaction_name2]),
  947. ],
  948. )
  949. def test_missing_transaction_index(self):
  950. with self.assertRaisesRegex(
  951. InvalidSearchQuery,
  952. re.escape("Tag value was not found"),
  953. ):
  954. MetricsQueryBuilder(
  955. self.params,
  956. "transaction:something_else",
  957. selected_columns=["transaction", "project", "p95(transaction.duration)"],
  958. )
  959. def test_missing_transaction_index_in_filter(self):
  960. with self.assertRaisesRegex(
  961. InvalidSearchQuery,
  962. re.escape("Tag value was not found"),
  963. ):
  964. MetricsQueryBuilder(
  965. self.params,
  966. "transaction:[something_else, something_else2]",
  967. selected_columns=["transaction", "project", "p95(transaction.duration)"],
  968. )
  969. def test_incorrect_parameter_for_metrics(self):
  970. with self.assertRaises(IncompatibleMetricsQuery):
  971. MetricsQueryBuilder(
  972. self.params,
  973. f"project:{self.project.slug}",
  974. selected_columns=["transaction", "count_unique(test)"],
  975. )
  976. def test_project_filter(self):
  977. query = MetricsQueryBuilder(
  978. self.params,
  979. f"project:{self.project.slug}",
  980. selected_columns=["transaction", "project", "p95(transaction.duration)"],
  981. )
  982. self.assertCountEqual(
  983. query.where,
  984. [
  985. *self.default_conditions,
  986. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  987. Condition(Column("project_id"), Op.EQ, self.project.id),
  988. ],
  989. )
  990. def test_limit_validation(self):
  991. # 51 is ok
  992. MetricsQueryBuilder(self.params, limit=51)
  993. # None is ok, defaults to 50
  994. query = MetricsQueryBuilder(self.params)
  995. assert query.limit.limit == 50
  996. # anything higher should throw an error
  997. with self.assertRaises(IncompatibleMetricsQuery):
  998. MetricsQueryBuilder(self.params, limit=10_000)
  999. def test_granularity(self):
  1000. # Need to pick granularity based on the period
  1001. def get_granularity(start, end):
  1002. params = {
  1003. "organization_id": self.organization.id,
  1004. "project_id": self.projects,
  1005. "start": start,
  1006. "end": end,
  1007. }
  1008. query = MetricsQueryBuilder(params)
  1009. return query.granularity.granularity
  1010. # If we're doing atleast day and its midnight we should use the daily bucket
  1011. start = datetime.datetime(2015, 5, 18, 0, 0, 0, tzinfo=timezone.utc)
  1012. end = datetime.datetime(2015, 5, 19, 0, 0, 0, tzinfo=timezone.utc)
  1013. assert get_granularity(start, end) == 86400, "A day at midnight"
  1014. # If we're doing several days, allow more range
  1015. start = datetime.datetime(2015, 5, 18, 0, 10, 0, tzinfo=timezone.utc)
  1016. end = datetime.datetime(2015, 5, 28, 23, 59, 0, tzinfo=timezone.utc)
  1017. assert get_granularity(start, end) == 86400, "Several days"
  1018. # We're doing a long period, use the biggest granularity
  1019. start = datetime.datetime(2015, 5, 18, 12, 33, 0, tzinfo=timezone.utc)
  1020. end = datetime.datetime(2015, 7, 28, 17, 22, 0, tzinfo=timezone.utc)
  1021. assert get_granularity(start, end) == 86400, "Big range"
  1022. # If we're on the start of the hour we should use the hour granularity
  1023. start = datetime.datetime(2015, 5, 18, 23, 0, 0, tzinfo=timezone.utc)
  1024. end = datetime.datetime(2015, 5, 20, 1, 0, 0, tzinfo=timezone.utc)
  1025. assert get_granularity(start, end) == 3600, "On the hour"
  1026. # If we're close to the start of the hour we should use the hour granularity
  1027. start = datetime.datetime(2015, 5, 18, 23, 3, 0, tzinfo=timezone.utc)
  1028. end = datetime.datetime(2015, 5, 21, 1, 57, 0, tzinfo=timezone.utc)
  1029. assert get_granularity(start, end) == 3600, "On the hour, close"
  1030. # A decently long period but not close to hour ends, still use hour bucket
  1031. start = datetime.datetime(2015, 5, 18, 23, 3, 0, tzinfo=timezone.utc)
  1032. end = datetime.datetime(2015, 5, 28, 1, 57, 0, tzinfo=timezone.utc)
  1033. assert get_granularity(start, end) == 3600, "On the hour, long period"
  1034. # Even though this is >24h of data, because its a random hour in the middle of the day to the next we use minute
  1035. # granularity
  1036. start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc)
  1037. end = datetime.datetime(2015, 5, 18, 18, 15, 1, tzinfo=timezone.utc)
  1038. assert get_granularity(start, end) == 60, "A few hours, but random minute"
  1039. # Less than a minute, no reason to work hard for such a small window, just use a minute
  1040. start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc)
  1041. end = datetime.datetime(2015, 5, 18, 10, 15, 34, tzinfo=timezone.utc)
  1042. assert get_granularity(start, end) == 60, "less than a minute"
  1043. def test_granularity_boundaries(self):
  1044. # Need to pick granularity based on the period
  1045. def get_granularity(start, end):
  1046. params = {
  1047. "organization_id": self.organization.id,
  1048. "project_id": self.projects,
  1049. "start": start,
  1050. "end": end,
  1051. }
  1052. query = MetricsQueryBuilder(params)
  1053. return query.granularity.granularity
  1054. # See resolve_granularity on the MQB to see what these boundaries are
  1055. # Exactly 30d, at the 30 minute boundary
  1056. start = datetime.datetime(2015, 5, 1, 0, 30, 0, tzinfo=timezone.utc)
  1057. end = datetime.datetime(2015, 5, 31, 0, 30, 0, tzinfo=timezone.utc)
  1058. assert get_granularity(start, end) == 86400, "30d at boundary"
  1059. # Near 30d, but 1 hour before the boundary for end
  1060. start = datetime.datetime(2015, 5, 1, 0, 30, 0, tzinfo=timezone.utc)
  1061. end = datetime.datetime(2015, 5, 30, 23, 29, 0, tzinfo=timezone.utc)
  1062. assert get_granularity(start, end) == 3600, "near 30d, but 1 hour before boundary for end"
  1063. # Near 30d, but 1 hour after the boundary for start
  1064. start = datetime.datetime(2015, 5, 1, 1, 30, 0, tzinfo=timezone.utc)
  1065. end = datetime.datetime(2015, 5, 31, 0, 30, 0, tzinfo=timezone.utc)
  1066. assert get_granularity(start, end) == 3600, "near 30d, but 1 hour after boundary for start"
  1067. # Exactly 3d
  1068. start = datetime.datetime(2015, 5, 1, 0, 30, 0, tzinfo=timezone.utc)
  1069. end = datetime.datetime(2015, 5, 4, 0, 30, 0, tzinfo=timezone.utc)
  1070. assert get_granularity(start, end) == 86400, "3d at boundary"
  1071. # Near 3d, but 1 hour before the boundary for end
  1072. start = datetime.datetime(2015, 5, 1, 0, 30, 0, tzinfo=timezone.utc)
  1073. end = datetime.datetime(2015, 5, 3, 23, 29, 0, tzinfo=timezone.utc)
  1074. assert get_granularity(start, end) == 3600, "near 3d, but 1 hour before boundary for end"
  1075. # Near 3d, but 1 hour after the boundary for start
  1076. start = datetime.datetime(2015, 5, 1, 1, 30, 0, tzinfo=timezone.utc)
  1077. end = datetime.datetime(2015, 5, 4, 0, 30, 0, tzinfo=timezone.utc)
  1078. assert get_granularity(start, end) == 3600, "near 3d, but 1 hour after boundary for start"
  1079. # exactly 12 hours
  1080. start = datetime.datetime(2015, 5, 1, 0, 15, 0, tzinfo=timezone.utc)
  1081. end = datetime.datetime(2015, 5, 1, 12, 15, 0, tzinfo=timezone.utc)
  1082. assert get_granularity(start, end) == 3600, "12h at boundary"
  1083. # Near 12h, but 15 minutes before the boundary for end
  1084. start = datetime.datetime(2015, 5, 1, 0, 15, 0, tzinfo=timezone.utc)
  1085. end = datetime.datetime(2015, 5, 1, 12, 0, 0, tzinfo=timezone.utc)
  1086. assert (
  1087. get_granularity(start, end) == 60
  1088. ), "12h at boundary, but 15 min before the boundary for end"
  1089. # Near 12h, but 15 minutes after the boundary for start
  1090. start = datetime.datetime(2015, 5, 1, 0, 30, 0, tzinfo=timezone.utc)
  1091. end = datetime.datetime(2015, 5, 1, 12, 15, 0, tzinfo=timezone.utc)
  1092. assert (
  1093. get_granularity(start, end) == 60
  1094. ), "12h at boundary, but 15 min after the boundary for start"
  1095. def test_get_snql_query(self):
  1096. query = MetricsQueryBuilder(self.params, "", selected_columns=["p90(transaction.duration)"])
  1097. snql_request = query.get_snql_query()
  1098. assert snql_request.dataset == "metrics"
  1099. snql_query = snql_request.query
  1100. self.assertCountEqual(
  1101. snql_query.select,
  1102. [
  1103. _metric_percentile_definition(self.organization.id, "90"),
  1104. ],
  1105. )
  1106. self.assertCountEqual(
  1107. query.where,
  1108. [
  1109. *self.default_conditions,
  1110. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  1111. ],
  1112. )
  1113. def test_get_snql_query_errors_with_multiple_dataset(self):
  1114. query = MetricsQueryBuilder(
  1115. self.params, "", selected_columns=["p90(transaction.duration)", "count_unique(user)"]
  1116. )
  1117. with self.assertRaises(NotImplementedError):
  1118. query.get_snql_query()
  1119. def test_get_snql_query_errors_with_no_functions(self):
  1120. query = MetricsQueryBuilder(self.params, "", selected_columns=["project"])
  1121. with self.assertRaises(IncompatibleMetricsQuery):
  1122. query.get_snql_query()
  1123. def test_run_query(self):
  1124. self.store_metric(
  1125. 100,
  1126. tags={"transaction": "foo_transaction"},
  1127. timestamp=self.start + datetime.timedelta(minutes=5),
  1128. )
  1129. self.store_metric(
  1130. 100,
  1131. metric="measurements.lcp",
  1132. tags={"transaction": "foo_transaction"},
  1133. timestamp=self.start + datetime.timedelta(minutes=5),
  1134. )
  1135. self.store_metric(
  1136. 1000,
  1137. metric="measurements.lcp",
  1138. tags={"transaction": "foo_transaction"},
  1139. timestamp=self.start + datetime.timedelta(minutes=5),
  1140. )
  1141. query = MetricsQueryBuilder(
  1142. self.params,
  1143. f"project:{self.project.slug}",
  1144. selected_columns=[
  1145. "transaction",
  1146. "p95(transaction.duration)",
  1147. "p100(measurements.lcp)",
  1148. ],
  1149. )
  1150. result = query.run_query("test_query")
  1151. assert len(result["data"]) == 1
  1152. assert result["data"][0] == {
  1153. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1154. "p95_transaction_duration": 100,
  1155. "p100_measurements_lcp": 1000,
  1156. }
  1157. self.assertCountEqual(
  1158. result["meta"],
  1159. [
  1160. {"name": "transaction", "type": "UInt64"},
  1161. {"name": "p95_transaction_duration", "type": "Float64"},
  1162. {"name": "p100_measurements_lcp", "type": "Float64"},
  1163. ],
  1164. )
  1165. def test_run_query_multiple_tables(self):
  1166. self.store_metric(
  1167. 100,
  1168. tags={"transaction": "foo_transaction"},
  1169. timestamp=self.start + datetime.timedelta(minutes=5),
  1170. )
  1171. self.store_metric(
  1172. 1,
  1173. metric="user",
  1174. tags={"transaction": "foo_transaction"},
  1175. timestamp=self.start + datetime.timedelta(minutes=5),
  1176. )
  1177. query = MetricsQueryBuilder(
  1178. self.params,
  1179. f"project:{self.project.slug}",
  1180. selected_columns=[
  1181. "transaction",
  1182. "p95(transaction.duration)",
  1183. "count_unique(user)",
  1184. ],
  1185. )
  1186. result = query.run_query("test_query")
  1187. assert len(result["data"]) == 1
  1188. assert result["data"][0] == {
  1189. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1190. "p95_transaction_duration": 100,
  1191. "count_unique_user": 1,
  1192. }
  1193. self.assertCountEqual(
  1194. result["meta"],
  1195. [
  1196. {"name": "transaction", "type": "UInt64"},
  1197. {"name": "p95_transaction_duration", "type": "Float64"},
  1198. {"name": "count_unique_user", "type": "UInt64"},
  1199. ],
  1200. )
  1201. def test_run_query_with_multiple_groupby_orderby_distribution(self):
  1202. self.setup_orderby_data()
  1203. query = MetricsQueryBuilder(
  1204. self.params,
  1205. f"project:{self.project.slug}",
  1206. selected_columns=[
  1207. "transaction",
  1208. "project",
  1209. "p95(transaction.duration)",
  1210. "count_unique(user)",
  1211. ],
  1212. orderby="-p95(transaction.duration)",
  1213. )
  1214. result = query.run_query("test_query")
  1215. assert len(result["data"]) == 2
  1216. assert result["data"][0] == {
  1217. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1218. "project": self.project.slug,
  1219. "p95_transaction_duration": 100,
  1220. "count_unique_user": 1,
  1221. }
  1222. assert result["data"][1] == {
  1223. "transaction": indexer.resolve(self.organization.id, "bar_transaction"),
  1224. "project": self.project.slug,
  1225. "p95_transaction_duration": 50,
  1226. "count_unique_user": 2,
  1227. }
  1228. self.assertCountEqual(
  1229. result["meta"],
  1230. [
  1231. {"name": "transaction", "type": "UInt64"},
  1232. {"name": "project", "type": "String"},
  1233. {"name": "p95_transaction_duration", "type": "Float64"},
  1234. {"name": "count_unique_user", "type": "UInt64"},
  1235. ],
  1236. )
  1237. def test_run_query_with_multiple_groupby_orderby_set(self):
  1238. self.setup_orderby_data()
  1239. query = MetricsQueryBuilder(
  1240. self.params,
  1241. f"project:{self.project.slug}",
  1242. selected_columns=[
  1243. "transaction",
  1244. "project",
  1245. "p95(transaction.duration)",
  1246. "count_unique(user)",
  1247. ],
  1248. orderby="-count_unique(user)",
  1249. )
  1250. result = query.run_query("test_query")
  1251. assert len(result["data"]) == 2
  1252. assert result["data"][0] == {
  1253. "transaction": indexer.resolve(self.organization.id, "bar_transaction"),
  1254. "project": self.project.slug,
  1255. "p95_transaction_duration": 50,
  1256. "count_unique_user": 2,
  1257. }
  1258. assert result["data"][1] == {
  1259. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1260. "project": self.project.slug,
  1261. "p95_transaction_duration": 100,
  1262. "count_unique_user": 1,
  1263. }
  1264. self.assertCountEqual(
  1265. result["meta"],
  1266. [
  1267. {"name": "transaction", "type": "UInt64"},
  1268. {"name": "project", "type": "String"},
  1269. {"name": "p95_transaction_duration", "type": "Float64"},
  1270. {"name": "count_unique_user", "type": "UInt64"},
  1271. ],
  1272. )
  1273. def test_run_query_with_project_orderby(self):
  1274. project_1 = self.create_project(slug="aaaaaa")
  1275. project_2 = self.create_project(slug="zzzzzz")
  1276. for project in [project_1, project_2]:
  1277. self.store_metric(
  1278. 100,
  1279. tags={"transaction": "foo_transaction"},
  1280. project=project.id,
  1281. timestamp=self.start + datetime.timedelta(minutes=5),
  1282. )
  1283. self.params["project_id"] = [project_1.id, project_2.id]
  1284. query = MetricsQueryBuilder(
  1285. self.params,
  1286. selected_columns=[
  1287. "transaction",
  1288. "project",
  1289. "p95(transaction.duration)",
  1290. ],
  1291. orderby="project",
  1292. )
  1293. result = query.run_query("test_query")
  1294. assert len(result["data"]) == 2
  1295. assert result["data"][0] == {
  1296. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1297. "project": project_1.slug,
  1298. "p95_transaction_duration": 100,
  1299. }
  1300. assert result["data"][1] == {
  1301. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1302. "project": project_2.slug,
  1303. "p95_transaction_duration": 100,
  1304. }
  1305. query = MetricsQueryBuilder(
  1306. self.params,
  1307. selected_columns=[
  1308. "transaction",
  1309. "project",
  1310. "p95(transaction.duration)",
  1311. ],
  1312. orderby="-project",
  1313. )
  1314. result = query.run_query("test_query")
  1315. assert len(result["data"]) == 2
  1316. assert result["data"][0] == {
  1317. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1318. "project": project_2.slug,
  1319. "p95_transaction_duration": 100,
  1320. }
  1321. assert result["data"][1] == {
  1322. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1323. "project": project_1.slug,
  1324. "p95_transaction_duration": 100,
  1325. }
  1326. def test_run_query_with_tag_orderby(self):
  1327. with self.assertRaises(IncompatibleMetricsQuery):
  1328. query = MetricsQueryBuilder(
  1329. self.params,
  1330. selected_columns=[
  1331. "transaction",
  1332. "project",
  1333. "p95(transaction.duration)",
  1334. ],
  1335. orderby="transaction",
  1336. )
  1337. query.run_query("test_query")
  1338. # TODO: multiple groupby with counter
  1339. def test_run_query_with_events_per_aggregates(self):
  1340. for i in range(5):
  1341. self.store_metric(100, timestamp=self.start + datetime.timedelta(minutes=i * 15))
  1342. query = MetricsQueryBuilder(
  1343. self.params,
  1344. "",
  1345. selected_columns=[
  1346. "eps()",
  1347. "epm()",
  1348. "tps()",
  1349. "tpm()",
  1350. ],
  1351. )
  1352. result = query.run_query("test_query")
  1353. data = result["data"][0]
  1354. # Check the aliases are correct
  1355. assert data["epm"] == data["tpm"]
  1356. assert data["eps"] == data["tps"]
  1357. # Check the values are correct
  1358. assert data["tpm"] == 5 / ((self.end - self.start).total_seconds() / 60)
  1359. assert data["tpm"] / 60 == data["tps"]
  1360. def test_count(self):
  1361. for _ in range(3):
  1362. self.store_metric(
  1363. 150,
  1364. timestamp=self.start + datetime.timedelta(minutes=5),
  1365. )
  1366. self.store_metric(
  1367. 50,
  1368. timestamp=self.start + datetime.timedelta(minutes=5),
  1369. )
  1370. query = MetricsQueryBuilder(
  1371. self.params,
  1372. "",
  1373. selected_columns=[
  1374. "count()",
  1375. ],
  1376. )
  1377. result = query.run_query("test_query")
  1378. data = result["data"][0]
  1379. assert data["count"] == 6
  1380. def test_avg_duration(self):
  1381. for _ in range(3):
  1382. self.store_metric(
  1383. 150,
  1384. timestamp=self.start + datetime.timedelta(minutes=5),
  1385. )
  1386. self.store_metric(
  1387. 50,
  1388. timestamp=self.start + datetime.timedelta(minutes=5),
  1389. )
  1390. query = MetricsQueryBuilder(
  1391. self.params,
  1392. "",
  1393. selected_columns=[
  1394. "avg(transaction.duration)",
  1395. ],
  1396. )
  1397. result = query.run_query("test_query")
  1398. data = result["data"][0]
  1399. assert data["avg_transaction_duration"] == 100
  1400. def test_avg_span_http(self):
  1401. for _ in range(3):
  1402. self.store_metric(
  1403. 150,
  1404. metric="spans.http",
  1405. timestamp=self.start + datetime.timedelta(minutes=5),
  1406. )
  1407. self.store_metric(
  1408. 50,
  1409. metric="spans.http",
  1410. timestamp=self.start + datetime.timedelta(minutes=5),
  1411. )
  1412. query = MetricsQueryBuilder(
  1413. self.params,
  1414. "",
  1415. selected_columns=[
  1416. "avg(spans.http)",
  1417. ],
  1418. )
  1419. result = query.run_query("test_query")
  1420. data = result["data"][0]
  1421. assert data["avg_spans_http"] == 100
  1422. def test_failure_rate(self):
  1423. for _ in range(3):
  1424. self.store_metric(
  1425. 100,
  1426. tags={"transaction.status": "internal_error"},
  1427. timestamp=self.start + datetime.timedelta(minutes=5),
  1428. )
  1429. self.store_metric(
  1430. 100,
  1431. tags={"transaction.status": "ok"},
  1432. timestamp=self.start + datetime.timedelta(minutes=5),
  1433. )
  1434. query = MetricsQueryBuilder(
  1435. self.params,
  1436. "",
  1437. selected_columns=[
  1438. "failure_rate()",
  1439. "failure_count()",
  1440. ],
  1441. )
  1442. result = query.run_query("test_query")
  1443. data = result["data"][0]
  1444. assert data["failure_rate"] == 0.5
  1445. assert data["failure_count"] == 3
  1446. def test_run_query_with_multiple_groupby_orderby_null_values_in_second_entity(self):
  1447. """Since the null value is on count_unique(user) we will still get baz_transaction since we query distributions
  1448. first which will have it, and then just not find a unique count in the second"""
  1449. self.setup_orderby_data()
  1450. self.store_metric(
  1451. 200,
  1452. tags={"transaction": "baz_transaction"},
  1453. timestamp=self.start + datetime.timedelta(minutes=5),
  1454. )
  1455. query = MetricsQueryBuilder(
  1456. self.params,
  1457. f"project:{self.project.slug}",
  1458. selected_columns=[
  1459. "transaction",
  1460. "project",
  1461. "p95(transaction.duration)",
  1462. "count_unique(user)",
  1463. ],
  1464. orderby="p95(transaction.duration)",
  1465. )
  1466. result = query.run_query("test_query")
  1467. assert len(result["data"]) == 3
  1468. assert result["data"][0] == {
  1469. "transaction": indexer.resolve(self.organization.id, "bar_transaction"),
  1470. "project": self.project.slug,
  1471. "p95_transaction_duration": 50,
  1472. "count_unique_user": 2,
  1473. }
  1474. assert result["data"][1] == {
  1475. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1476. "project": self.project.slug,
  1477. "p95_transaction_duration": 100,
  1478. "count_unique_user": 1,
  1479. }
  1480. assert result["data"][2] == {
  1481. "transaction": indexer.resolve(self.organization.id, "baz_transaction"),
  1482. "project": self.project.slug,
  1483. "p95_transaction_duration": 200,
  1484. "count_unique_user": 0,
  1485. }
  1486. self.assertCountEqual(
  1487. result["meta"],
  1488. [
  1489. {"name": "transaction", "type": "UInt64"},
  1490. {"name": "project", "type": "String"},
  1491. {"name": "p95_transaction_duration", "type": "Float64"},
  1492. {"name": "count_unique_user", "type": "UInt64"},
  1493. ],
  1494. )
  1495. @pytest.mark.skip(
  1496. reason="Currently cannot handle the case where null values are in the first entity"
  1497. )
  1498. def test_run_query_with_multiple_groupby_orderby_null_values_in_first_entity(self):
  1499. """But if the null value is in the first entity, it won't show up in the groupby values, which means the
  1500. transaction will be missing"""
  1501. self.setup_orderby_data()
  1502. self.store_metric(200, tags={"transaction": "baz_transaction"})
  1503. query = MetricsQueryBuilder(
  1504. self.params,
  1505. f"project:{self.project.slug}",
  1506. selected_columns=[
  1507. "transaction",
  1508. "project",
  1509. "p95(transaction.duration)",
  1510. "count_unique(user)",
  1511. ],
  1512. orderby="count_unique(user)",
  1513. )
  1514. result = query.run_query("test_query")
  1515. assert len(result["data"]) == 3
  1516. assert result["data"][0] == {
  1517. "transaction": indexer.resolve(self.organization.id, "baz_transaction"),
  1518. "project": self.project.slug,
  1519. "p95_transaction_duration": 200,
  1520. }
  1521. assert result["data"][1] == {
  1522. "transaction": indexer.resolve(self.organization.id, "foo_transaction"),
  1523. "project": self.project.slug,
  1524. "p95_transaction_duration": 100,
  1525. "count_unique_user": 1,
  1526. }
  1527. assert result["data"][2] == {
  1528. "transaction": indexer.resolve(self.organization.id, "bar_transaction"),
  1529. "project": self.project.slug,
  1530. "p95_transaction_duration": 50,
  1531. "count_unique_user": 2,
  1532. }
  1533. def test_multiple_entity_orderby_fails(self):
  1534. with self.assertRaises(IncompatibleMetricsQuery):
  1535. query = MetricsQueryBuilder(
  1536. self.params,
  1537. f"project:{self.project.slug}",
  1538. selected_columns=[
  1539. "transaction",
  1540. "project",
  1541. "p95(transaction.duration)",
  1542. "count_unique(user)",
  1543. ],
  1544. orderby=["-count_unique(user)", "p95(transaction.duration)"],
  1545. )
  1546. query.run_query("test_query")
  1547. def test_multiple_entity_query_fails(self):
  1548. with self.assertRaises(IncompatibleMetricsQuery):
  1549. query = MetricsQueryBuilder(
  1550. self.params,
  1551. "p95(transaction.duration):>5s AND count_unique(user):>0",
  1552. selected_columns=[
  1553. "transaction",
  1554. "project",
  1555. "p95(transaction.duration)",
  1556. "count_unique(user)",
  1557. ],
  1558. use_aggregate_conditions=True,
  1559. )
  1560. query.run_query("test_query")
  1561. def test_query_entity_does_not_match_orderby(self):
  1562. with self.assertRaises(IncompatibleMetricsQuery):
  1563. query = MetricsQueryBuilder(
  1564. self.params,
  1565. "count_unique(user):>0",
  1566. selected_columns=[
  1567. "transaction",
  1568. "project",
  1569. "p95(transaction.duration)",
  1570. "count_unique(user)",
  1571. ],
  1572. orderby=["p95(transaction.duration)"],
  1573. use_aggregate_conditions=True,
  1574. )
  1575. query.run_query("test_query")
  1576. def test_aggregate_query_with_multiple_entities_without_orderby(self):
  1577. self.store_metric(
  1578. 200,
  1579. tags={"transaction": "baz_transaction"},
  1580. timestamp=self.start + datetime.timedelta(minutes=5),
  1581. )
  1582. self.store_metric(
  1583. 1,
  1584. metric="user",
  1585. tags={"transaction": "bar_transaction"},
  1586. timestamp=self.start + datetime.timedelta(minutes=5),
  1587. )
  1588. self.store_metric(
  1589. 1,
  1590. metric="user",
  1591. tags={"transaction": "baz_transaction"},
  1592. timestamp=self.start + datetime.timedelta(minutes=5),
  1593. )
  1594. self.store_metric(
  1595. 2,
  1596. metric="user",
  1597. tags={"transaction": "baz_transaction"},
  1598. timestamp=self.start + datetime.timedelta(minutes=5),
  1599. )
  1600. # This will query both sets & distribution cause of selected columns
  1601. query = MetricsQueryBuilder(
  1602. self.params,
  1603. # Filter by count_unique since the default primary is distributions without an orderby
  1604. "count_unique(user):>1",
  1605. selected_columns=[
  1606. "transaction",
  1607. "project",
  1608. "p95(transaction.duration)",
  1609. "count_unique(user)",
  1610. ],
  1611. allow_metric_aggregates=True,
  1612. use_aggregate_conditions=True,
  1613. )
  1614. result = query.run_query("test_query")
  1615. assert len(result["data"]) == 1
  1616. assert result["data"][0] == {
  1617. "transaction": indexer.resolve(self.organization.id, "baz_transaction"),
  1618. "project": self.project.slug,
  1619. "p95_transaction_duration": 200,
  1620. "count_unique_user": 2,
  1621. }
  1622. self.assertCountEqual(
  1623. result["meta"],
  1624. [
  1625. {"name": "transaction", "type": "UInt64"},
  1626. {"name": "project", "type": "String"},
  1627. {"name": "p95_transaction_duration", "type": "Float64"},
  1628. {"name": "count_unique_user", "type": "UInt64"},
  1629. ],
  1630. )
  1631. def test_aggregate_query_with_multiple_entities_with_orderby(self):
  1632. self.store_metric(
  1633. 200,
  1634. tags={"transaction": "baz_transaction"},
  1635. timestamp=self.start + datetime.timedelta(minutes=5),
  1636. )
  1637. self.store_metric(
  1638. 1,
  1639. tags={"transaction": "bar_transaction"},
  1640. timestamp=self.start + datetime.timedelta(minutes=5),
  1641. )
  1642. self.store_metric(
  1643. 1,
  1644. metric="user",
  1645. tags={"transaction": "baz_transaction"},
  1646. timestamp=self.start + datetime.timedelta(minutes=5),
  1647. )
  1648. # This will query both sets & distribution cause of selected columns
  1649. query = MetricsQueryBuilder(
  1650. self.params,
  1651. "p95(transaction.duration):>100",
  1652. selected_columns=[
  1653. "transaction",
  1654. "project",
  1655. "p95(transaction.duration)",
  1656. "count_unique(user)",
  1657. ],
  1658. orderby=["p95(transaction.duration)"],
  1659. allow_metric_aggregates=True,
  1660. use_aggregate_conditions=True,
  1661. )
  1662. result = query.run_query("test_query")
  1663. assert len(result["data"]) == 1
  1664. assert result["data"][0] == {
  1665. "transaction": indexer.resolve(self.organization.id, "baz_transaction"),
  1666. "project": self.project.slug,
  1667. "p95_transaction_duration": 200,
  1668. "count_unique_user": 1,
  1669. }
  1670. self.assertCountEqual(
  1671. result["meta"],
  1672. [
  1673. {"name": "transaction", "type": "UInt64"},
  1674. {"name": "project", "type": "String"},
  1675. {"name": "p95_transaction_duration", "type": "Float64"},
  1676. {"name": "count_unique_user", "type": "UInt64"},
  1677. ],
  1678. )
  1679. def test_invalid_column_arg(self):
  1680. for function in [
  1681. "count_unique(transaction.duration)",
  1682. "count_miserable(measurements.fcp)",
  1683. "p75(user)",
  1684. "count_web_vitals(user, poor)",
  1685. ]:
  1686. with self.assertRaises(IncompatibleMetricsQuery):
  1687. MetricsQueryBuilder(
  1688. self.params,
  1689. "",
  1690. selected_columns=[function],
  1691. )
  1692. def test_orderby_field_alias(self):
  1693. query = MetricsQueryBuilder(
  1694. self.params,
  1695. selected_columns=[
  1696. "transaction",
  1697. "p95()",
  1698. ],
  1699. orderby=["p95"],
  1700. )
  1701. assert len(query.orderby) == 1
  1702. assert query.orderby[0].exp == _metric_percentile_definition(
  1703. self.organization.id, "95", "transaction.duration", "p95"
  1704. )
  1705. query = MetricsQueryBuilder(
  1706. self.params,
  1707. selected_columns=[
  1708. "transaction",
  1709. "p95() as test",
  1710. ],
  1711. orderby=["test"],
  1712. )
  1713. assert len(query.orderby) == 1
  1714. assert query.orderby[0].exp == _metric_percentile_definition(
  1715. self.organization.id, "95", "transaction.duration", "test"
  1716. )
  1717. def test_error_if_aggregates_disallowed(self):
  1718. def run_query(query, use_aggregate_conditions):
  1719. with self.assertRaises(IncompatibleMetricsQuery):
  1720. MetricsQueryBuilder(
  1721. self.params,
  1722. selected_columns=[
  1723. "transaction",
  1724. "p95()",
  1725. "count_unique(user)",
  1726. ],
  1727. query=query,
  1728. allow_metric_aggregates=False,
  1729. use_aggregate_conditions=use_aggregate_conditions,
  1730. )
  1731. queries = [
  1732. "p95():>5s",
  1733. "count_unique(user):>0",
  1734. "transaction:foo_transaction AND (!transaction:bar_transaction OR p95():>5s)",
  1735. ]
  1736. for query in queries:
  1737. for use_aggregate_conditions in [True, False]:
  1738. run_query(query, use_aggregate_conditions)
  1739. def test_no_error_if_aggregates_disallowed_but_no_aggregates_included(self):
  1740. MetricsQueryBuilder(
  1741. self.params,
  1742. selected_columns=[
  1743. "transaction",
  1744. "p95()",
  1745. "count_unique(user)",
  1746. ],
  1747. query="transaction:foo_transaction",
  1748. allow_metric_aggregates=False,
  1749. use_aggregate_conditions=True,
  1750. )
  1751. MetricsQueryBuilder(
  1752. self.params,
  1753. selected_columns=[
  1754. "transaction",
  1755. "p95()",
  1756. "count_unique(user)",
  1757. ],
  1758. query="transaction:foo_transaction",
  1759. allow_metric_aggregates=False,
  1760. use_aggregate_conditions=False,
  1761. )
  1762. def test_multiple_dataset_but_no_data(self):
  1763. """When there's no data from the primary dataset we shouldn't error out"""
  1764. result = MetricsQueryBuilder(
  1765. self.params,
  1766. selected_columns=[
  1767. "p50()",
  1768. "count_unique(user)",
  1769. ],
  1770. allow_metric_aggregates=False,
  1771. use_aggregate_conditions=True,
  1772. ).run_query("test")
  1773. assert len(result["data"]) == 1
  1774. data = result["data"][0]
  1775. assert data["count_unique_user"] == 0
  1776. # Handled by the discover transform later so its fine that this is nan
  1777. assert math.isnan(data["p50"])
  1778. @mock.patch("sentry.search.events.builder.raw_snql_query")
  1779. @mock.patch("sentry.search.events.builder.indexer.resolve", return_value=-1)
  1780. def test_dry_run_does_not_hit_indexer_or_clickhouse(self, mock_indexer, mock_query):
  1781. query = MetricsQueryBuilder(
  1782. self.params,
  1783. # Include a tag:value search as well since that resolves differently
  1784. f"project:{self.project.slug} transaction:foo_transaction",
  1785. selected_columns=[
  1786. "transaction",
  1787. "p95(transaction.duration)",
  1788. "p100(measurements.lcp)",
  1789. "apdex()",
  1790. "count_web_vitals(measurements.lcp, good)",
  1791. ],
  1792. dry_run=True,
  1793. )
  1794. query.run_query("test_query")
  1795. assert not mock_indexer.called
  1796. assert not mock_query.called
  1797. @mock.patch("sentry.search.events.builder.indexer.resolve", return_value=-1)
  1798. def test_multiple_references_only_resolve_index_once(self, mock_indexer):
  1799. MetricsQueryBuilder(
  1800. self.params,
  1801. f"project:{self.project.slug} transaction:foo_transaction transaction:foo_transaction",
  1802. selected_columns=[
  1803. "transaction",
  1804. "count_web_vitals(measurements.lcp, good)",
  1805. "count_web_vitals(measurements.lcp, good)",
  1806. "count_web_vitals(measurements.lcp, good)",
  1807. "count_web_vitals(measurements.lcp, good)",
  1808. "count_web_vitals(measurements.lcp, good)",
  1809. ],
  1810. )
  1811. self.assertCountEqual(
  1812. mock_indexer.mock_calls,
  1813. [
  1814. mock.call(self.organization.id, "transaction"),
  1815. mock.call(self.organization.id, "foo_transaction"),
  1816. mock.call(self.organization.id, constants.METRICS_MAP["measurements.lcp"]),
  1817. mock.call(self.organization.id, "measurement_rating"),
  1818. mock.call(self.organization.id, "good"),
  1819. ],
  1820. )
  1821. class TimeseriesMetricQueryBuilderTest(MetricBuilderBaseTest):
  1822. def test_get_query(self):
  1823. query = TimeseriesMetricQueryBuilder(
  1824. self.params, interval=900, query="", selected_columns=["p50(transaction.duration)"]
  1825. )
  1826. snql_query = query.get_snql_query()
  1827. assert len(snql_query) == 1
  1828. query = snql_query[0].query
  1829. assert query.where == [
  1830. *self.default_conditions,
  1831. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  1832. ]
  1833. assert query.select == [_metric_percentile_definition(self.organization.id, "50")]
  1834. assert query.match.name == "metrics_distributions"
  1835. assert query.granularity.granularity == 60
  1836. def test_default_conditions(self):
  1837. query = TimeseriesMetricQueryBuilder(
  1838. self.params, interval=900, query="", selected_columns=[]
  1839. )
  1840. self.assertCountEqual(query.where, self.default_conditions)
  1841. def test_granularity(self):
  1842. # Need to pick granularity based on the period and interval for timeseries
  1843. def get_granularity(start, end, interval):
  1844. params = {
  1845. "organization_id": self.organization.id,
  1846. "project_id": self.projects,
  1847. "start": start,
  1848. "end": end,
  1849. }
  1850. query = TimeseriesMetricQueryBuilder(params, interval=interval)
  1851. return query.granularity.granularity
  1852. # If we're doing atleast day and its midnight we should use the daily bucket
  1853. start = datetime.datetime(2015, 5, 18, 0, 0, 0, tzinfo=timezone.utc)
  1854. end = datetime.datetime(2015, 5, 19, 0, 0, 0, tzinfo=timezone.utc)
  1855. assert get_granularity(start, end, 30) == 10, "A day at midnight, 30s interval"
  1856. assert get_granularity(start, end, 900) == 60, "A day at midnight, 15min interval"
  1857. assert get_granularity(start, end, 3600) == 60, "A day at midnight, 1hr interval"
  1858. assert get_granularity(start, end, 86400) == 3600, "A day at midnight, 1d interval"
  1859. # If we're on the start of the hour we should use the hour granularity
  1860. start = datetime.datetime(2015, 5, 18, 23, 0, 0, tzinfo=timezone.utc)
  1861. end = datetime.datetime(2015, 5, 20, 1, 0, 0, tzinfo=timezone.utc)
  1862. assert get_granularity(start, end, 30) == 10, "On the hour, 30s interval"
  1863. assert get_granularity(start, end, 900) == 60, "On the hour, 15min interval"
  1864. assert get_granularity(start, end, 3600) == 60, "On the hour, 1hr interval"
  1865. assert get_granularity(start, end, 86400) == 3600, "On the hour, 1d interval"
  1866. # Even though this is >24h of data, because its a random hour in the middle of the day to the next we use minute
  1867. # granularity
  1868. start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc)
  1869. end = datetime.datetime(2015, 5, 19, 15, 15, 1, tzinfo=timezone.utc)
  1870. assert get_granularity(start, end, 30) == 10, "A few hours, but random minute, 30s interval"
  1871. assert (
  1872. get_granularity(start, end, 900) == 60
  1873. ), "A few hours, but random minute, 15min interval"
  1874. assert (
  1875. get_granularity(start, end, 3600) == 60
  1876. ), "A few hours, but random minute, 1hr interval"
  1877. assert (
  1878. get_granularity(start, end, 86400) == 3600
  1879. ), "A few hours, but random minute, 1d interval"
  1880. # Less than a minute, no reason to work hard for such a small window, just use a minute
  1881. start = datetime.datetime(2015, 5, 18, 10, 15, 1, tzinfo=timezone.utc)
  1882. end = datetime.datetime(2015, 5, 19, 10, 15, 34, tzinfo=timezone.utc)
  1883. assert get_granularity(start, end, 30) == 10, "less than a minute, 30s interval"
  1884. assert get_granularity(start, end, 900) == 60, "less than a minute, 15min interval"
  1885. assert get_granularity(start, end, 3600) == 60, "less than a minute, 1hr interval"
  1886. assert get_granularity(start, end, 86400) == 3600, "less than a minute, 1d interval"
  1887. def test_transaction_in_filter(self):
  1888. query = TimeseriesMetricQueryBuilder(
  1889. self.params,
  1890. interval=900,
  1891. query="transaction:[foo_transaction, bar_transaction]",
  1892. selected_columns=["p95(transaction.duration)"],
  1893. )
  1894. transaction_index = indexer.resolve(self.organization.id, "transaction")
  1895. transaction_name1 = indexer.resolve(self.organization.id, "foo_transaction")
  1896. transaction_name2 = indexer.resolve(self.organization.id, "bar_transaction")
  1897. transaction = Column(f"tags[{transaction_index}]")
  1898. self.assertCountEqual(
  1899. query.where,
  1900. [
  1901. *self.default_conditions,
  1902. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  1903. Condition(transaction, Op.IN, [transaction_name1, transaction_name2]),
  1904. ],
  1905. )
  1906. def test_missing_transaction_index(self):
  1907. with self.assertRaisesRegex(
  1908. InvalidSearchQuery,
  1909. re.escape("Tag value was not found"),
  1910. ):
  1911. TimeseriesMetricQueryBuilder(
  1912. self.params,
  1913. interval=900,
  1914. query="transaction:something_else",
  1915. selected_columns=["project", "p95(transaction.duration)"],
  1916. )
  1917. def test_missing_transaction_index_in_filter(self):
  1918. with self.assertRaisesRegex(
  1919. InvalidSearchQuery,
  1920. re.escape("Tag value was not found"),
  1921. ):
  1922. TimeseriesMetricQueryBuilder(
  1923. self.params,
  1924. interval=900,
  1925. query="transaction:[something_else, something_else2]",
  1926. selected_columns=["p95(transaction.duration)"],
  1927. )
  1928. def test_project_filter(self):
  1929. query = TimeseriesMetricQueryBuilder(
  1930. self.params,
  1931. interval=900,
  1932. query=f"project:{self.project.slug}",
  1933. selected_columns=["p95(transaction.duration)"],
  1934. )
  1935. self.assertCountEqual(
  1936. query.where,
  1937. [
  1938. *self.default_conditions,
  1939. *_metric_conditions(self.organization.id, ["transaction.duration"]),
  1940. Condition(Column("project_id"), Op.EQ, self.project.id),
  1941. ],
  1942. )
  1943. def test_meta(self):
  1944. query = TimeseriesMetricQueryBuilder(
  1945. self.params,
  1946. interval=900,
  1947. selected_columns=["p50(transaction.duration)", "count_unique(user)"],
  1948. )
  1949. result = query.run_query("test_query")
  1950. self.assertCountEqual(
  1951. result["meta"],
  1952. [
  1953. {"name": "time", "type": "DateTime('Universal')"},
  1954. {"name": "p50_transaction_duration", "type": "Float64"},
  1955. {"name": "count_unique_user", "type": "UInt64"},
  1956. ],
  1957. )
  1958. def test_with_aggregate_filter(self):
  1959. query = TimeseriesMetricQueryBuilder(
  1960. self.params,
  1961. interval=900,
  1962. query="p50(transaction.duration):>100",
  1963. selected_columns=["p50(transaction.duration)", "count_unique(user)"],
  1964. allow_metric_aggregates=True,
  1965. )
  1966. # Aggregate conditions should be dropped
  1967. assert query.having == []
  1968. def test_run_query(self):
  1969. for i in range(5):
  1970. self.store_metric(100, timestamp=self.start + datetime.timedelta(minutes=i * 15))
  1971. self.store_metric(
  1972. 1,
  1973. metric="user",
  1974. timestamp=self.start + datetime.timedelta(minutes=i * 15),
  1975. )
  1976. query = TimeseriesMetricQueryBuilder(
  1977. self.params,
  1978. interval=900,
  1979. query="",
  1980. selected_columns=["p50(transaction.duration)", "count_unique(user)"],
  1981. )
  1982. result = query.run_query("test_query")
  1983. assert result["data"] == [
  1984. {
  1985. "time": self.start.isoformat(),
  1986. "p50_transaction_duration": 100.0,
  1987. "count_unique_user": 1,
  1988. },
  1989. {
  1990. "time": (self.start + datetime.timedelta(minutes=15)).isoformat(),
  1991. "p50_transaction_duration": 100.0,
  1992. "count_unique_user": 1,
  1993. },
  1994. {
  1995. "time": (self.start + datetime.timedelta(minutes=30)).isoformat(),
  1996. "p50_transaction_duration": 100.0,
  1997. "count_unique_user": 1,
  1998. },
  1999. {
  2000. "time": (self.start + datetime.timedelta(minutes=45)).isoformat(),
  2001. "p50_transaction_duration": 100.0,
  2002. "count_unique_user": 1,
  2003. },
  2004. {
  2005. "time": (self.start + datetime.timedelta(minutes=60)).isoformat(),
  2006. "p50_transaction_duration": 100.0,
  2007. "count_unique_user": 1,
  2008. },
  2009. ]
  2010. self.assertCountEqual(
  2011. result["meta"],
  2012. [
  2013. {"name": "time", "type": "DateTime('Universal')"},
  2014. {"name": "p50_transaction_duration", "type": "Float64"},
  2015. {"name": "count_unique_user", "type": "UInt64"},
  2016. ],
  2017. )
  2018. def test_run_query_with_hour_interval(self):
  2019. # See comment on resolve_time_column for explanation of this test
  2020. self.start = datetime.datetime.now(timezone.utc).replace(
  2021. hour=15, minute=30, second=0, microsecond=0
  2022. )
  2023. self.end = datetime.datetime.fromtimestamp(self.start.timestamp() + 86400, timezone.utc)
  2024. self.params = {
  2025. "organization_id": self.organization.id,
  2026. "project_id": self.projects,
  2027. "start": self.start,
  2028. "end": self.end,
  2029. }
  2030. for i in range(5):
  2031. self.store_metric(
  2032. 100,
  2033. timestamp=self.start + datetime.timedelta(minutes=i * 15),
  2034. )
  2035. query = TimeseriesMetricQueryBuilder(
  2036. self.params,
  2037. interval=3600,
  2038. query="",
  2039. selected_columns=["epm(3600)"],
  2040. )
  2041. result = query.run_query("test_query")
  2042. date_prefix = self.start.strftime("%Y-%m-%dT")
  2043. assert result["data"] == [
  2044. {"time": f"{date_prefix}15:00:00+00:00", "epm_3600": 2 / (3600 / 60)},
  2045. {"time": f"{date_prefix}16:00:00+00:00", "epm_3600": 3 / (3600 / 60)},
  2046. ]
  2047. self.assertCountEqual(
  2048. result["meta"],
  2049. [
  2050. {"name": "time", "type": "DateTime('Universal')"},
  2051. {"name": "epm_3600", "type": "Float64"},
  2052. ],
  2053. )
  2054. def test_run_query_with_granularity_larger_than_interval(self):
  2055. """The base MetricsQueryBuilder with a perfect 1d query will try to use granularity 86400 which is larger than
  2056. the interval of 3600, in this case we want to make sure to use a smaller granularity to get the correct
  2057. result"""
  2058. self.start = datetime.datetime.now(timezone.utc).replace(
  2059. hour=0, minute=0, second=0, microsecond=0
  2060. )
  2061. self.end = datetime.datetime.fromtimestamp(self.start.timestamp() + 86400, timezone.utc)
  2062. self.params = {
  2063. "organization_id": self.organization.id,
  2064. "project_id": self.projects,
  2065. "start": self.start,
  2066. "end": self.end,
  2067. }
  2068. for i in range(1, 5):
  2069. self.store_metric(
  2070. 100,
  2071. timestamp=self.start + datetime.timedelta(minutes=i * 15),
  2072. )
  2073. query = TimeseriesMetricQueryBuilder(
  2074. self.params,
  2075. interval=3600,
  2076. query="",
  2077. selected_columns=["epm(3600)"],
  2078. )
  2079. result = query.run_query("test_query")
  2080. date_prefix = self.start.strftime("%Y-%m-%dT")
  2081. assert result["data"] == [
  2082. {"time": f"{date_prefix}00:00:00+00:00", "epm_3600": 3 / (3600 / 60)},
  2083. {"time": f"{date_prefix}01:00:00+00:00", "epm_3600": 1 / (3600 / 60)},
  2084. ]
  2085. self.assertCountEqual(
  2086. result["meta"],
  2087. [
  2088. {"name": "time", "type": "DateTime('Universal')"},
  2089. {"name": "epm_3600", "type": "Float64"},
  2090. ],
  2091. )
  2092. def test_run_query_with_filter(self):
  2093. for i in range(5):
  2094. self.store_metric(
  2095. 100,
  2096. tags={"transaction": "foo_transaction"},
  2097. timestamp=self.start + datetime.timedelta(minutes=i * 15),
  2098. )
  2099. self.store_metric(
  2100. 200,
  2101. tags={"transaction": "bar_transaction"},
  2102. timestamp=self.start + datetime.timedelta(minutes=i * 15),
  2103. )
  2104. query = TimeseriesMetricQueryBuilder(
  2105. self.params,
  2106. interval=900,
  2107. query="transaction:foo_transaction",
  2108. selected_columns=["p50(transaction.duration)"],
  2109. )
  2110. result = query.run_query("test_query")
  2111. assert result["data"] == [
  2112. {"time": self.start.isoformat(), "p50_transaction_duration": 100.0},
  2113. {
  2114. "time": (self.start + datetime.timedelta(minutes=15)).isoformat(),
  2115. "p50_transaction_duration": 100.0,
  2116. },
  2117. {
  2118. "time": (self.start + datetime.timedelta(minutes=30)).isoformat(),
  2119. "p50_transaction_duration": 100.0,
  2120. },
  2121. {
  2122. "time": (self.start + datetime.timedelta(minutes=45)).isoformat(),
  2123. "p50_transaction_duration": 100.0,
  2124. },
  2125. {
  2126. "time": (self.start + datetime.timedelta(minutes=60)).isoformat(),
  2127. "p50_transaction_duration": 100.0,
  2128. },
  2129. ]
  2130. self.assertCountEqual(
  2131. result["meta"],
  2132. [
  2133. {"name": "time", "type": "DateTime('Universal')"},
  2134. {"name": "p50_transaction_duration", "type": "Float64"},
  2135. ],
  2136. )
  2137. def test_error_if_aggregates_disallowed(self):
  2138. def run_query(query):
  2139. with self.assertRaises(IncompatibleMetricsQuery):
  2140. TimeseriesMetricQueryBuilder(
  2141. self.params,
  2142. interval=900,
  2143. query=query,
  2144. selected_columns=["p50(transaction.duration)"],
  2145. allow_metric_aggregates=False,
  2146. )
  2147. queries = [
  2148. "p95():>5s",
  2149. "count_unique(user):>0",
  2150. "transaction:foo_transaction AND (!transaction:bar_transaction OR p95():>5s)",
  2151. ]
  2152. for query in queries:
  2153. run_query(query)
  2154. def test_no_error_if_aggregates_disallowed_but_no_aggregates_included(self):
  2155. TimeseriesMetricQueryBuilder(
  2156. self.params,
  2157. interval=900,
  2158. selected_columns=["p50(transaction.duration)"],
  2159. query="transaction:foo_transaction",
  2160. allow_metric_aggregates=False,
  2161. )
  2162. def test_invalid_semver_filter(self):
  2163. with self.assertRaises(InvalidSearchQuery):
  2164. QueryBuilder(
  2165. Dataset.Discover,
  2166. self.params,
  2167. "user.email:foo@example.com release.build:[1.2.1]",
  2168. ["user.email", "release"],
  2169. )
  2170. class HistogramMetricQueryBuilderTest(MetricBuilderBaseTest):
  2171. def test_histogram_columns_set_on_builder(self):
  2172. builder = HistogramMetricQueryBuilder(
  2173. params=self.params,
  2174. query="",
  2175. selected_columns=[
  2176. "histogram(transaction.duration)",
  2177. "histogram(measurements.lcp)",
  2178. "histogram(measurements.fcp) as test",
  2179. ],
  2180. histogram_params=HistogramParams(
  2181. 5,
  2182. 100,
  2183. 0,
  2184. 1, # not used by Metrics
  2185. ),
  2186. )
  2187. self.assertCountEqual(
  2188. builder.histogram_aliases,
  2189. [
  2190. "histogram_transaction_duration",
  2191. "histogram_measurements_lcp",
  2192. "test",
  2193. ],
  2194. )
  2195. def test_get_query(self):
  2196. self.store_metric(
  2197. 100,
  2198. tags={"transaction": "foo_transaction"},
  2199. timestamp=self.start + datetime.timedelta(minutes=5),
  2200. )
  2201. self.store_metric(
  2202. 100,
  2203. tags={"transaction": "foo_transaction"},
  2204. timestamp=self.start + datetime.timedelta(minutes=5),
  2205. )
  2206. self.store_metric(
  2207. 450,
  2208. tags={"transaction": "foo_transaction"},
  2209. timestamp=self.start + datetime.timedelta(minutes=5),
  2210. )
  2211. query = HistogramMetricQueryBuilder(
  2212. params=self.params,
  2213. query="",
  2214. selected_columns=["histogram(transaction.duration)"],
  2215. histogram_params=HistogramParams(
  2216. 5,
  2217. 100,
  2218. 0,
  2219. 1, # not used by Metrics
  2220. ),
  2221. )
  2222. snql_query = query.run_query("test_query")
  2223. assert len(snql_query["data"]) == 1
  2224. # This data is intepolated via rebucket_histogram
  2225. assert snql_query["data"][0]["histogram_transaction_duration"] == [
  2226. (0.0, 100.0, 0),
  2227. (100.0, 200.0, 2),
  2228. (200.0, 300.0, 1),
  2229. (300.0, 400.0, 1),
  2230. (400.0, 500.0, 1),
  2231. ]
  2232. def test_query_normal_distribution(self):
  2233. for i in range(5):
  2234. for _ in range((5 - abs(i - 2)) ** 2):
  2235. self.store_metric(
  2236. 100 * i + 50,
  2237. tags={"transaction": "foo_transaction"},
  2238. timestamp=self.start + datetime.timedelta(minutes=5),
  2239. )
  2240. query = HistogramMetricQueryBuilder(
  2241. params=self.params,
  2242. query="",
  2243. selected_columns=["histogram(transaction.duration)"],
  2244. histogram_params=HistogramParams(
  2245. 5,
  2246. 100,
  2247. 0,
  2248. 1, # not used by Metrics
  2249. ),
  2250. )
  2251. snql_query = query.run_query("test_query")
  2252. assert len(snql_query["data"]) == 1
  2253. # This data is intepolated via rebucket_histogram
  2254. assert snql_query["data"][0]["histogram_transaction_duration"] == [
  2255. (0.0, 100.0, 10),
  2256. (100.0, 200.0, 17),
  2257. (200.0, 300.0, 23),
  2258. (300.0, 400.0, 17),
  2259. (400.0, 500.0, 10),
  2260. ]