test_discover_timeseries_query.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. from datetime import timedelta
  2. from unittest.mock import patch
  3. import pytest
  4. from sentry.exceptions import InvalidSearchQuery
  5. from sentry.models.transaction_threshold import ProjectTransactionThreshold, TransactionMetric
  6. from sentry.snuba import discover
  7. from sentry.snuba.dataset import Dataset
  8. from sentry.testutils.cases import SnubaTestCase, TestCase
  9. from sentry.testutils.helpers.datetime import before_now, iso_format
  10. from sentry.utils.samples import load_data
  11. ARRAY_COLUMNS = ["measurements", "span_op_breakdowns"]
  12. class TimeseriesBase(SnubaTestCase, TestCase):
  13. def setUp(self):
  14. super().setUp()
  15. self.one_min_ago = before_now(minutes=1)
  16. self.day_ago = before_now(days=1).replace(hour=10, minute=0, second=0, microsecond=0)
  17. self.store_event(
  18. data={
  19. "event_id": "a" * 32,
  20. "message": "very bad",
  21. "timestamp": iso_format(self.day_ago + timedelta(hours=1)),
  22. "fingerprint": ["group1"],
  23. "tags": {"important": "yes"},
  24. "user": {"id": 1},
  25. },
  26. project_id=self.project.id,
  27. )
  28. self.store_event(
  29. data={
  30. "event_id": "b" * 32,
  31. "message": "oh my",
  32. "timestamp": iso_format(self.day_ago + timedelta(hours=1, minutes=1)),
  33. "fingerprint": ["group2"],
  34. "tags": {"important": "no"},
  35. },
  36. project_id=self.project.id,
  37. )
  38. self.store_event(
  39. data={
  40. "event_id": "c" * 32,
  41. "message": "very bad",
  42. "timestamp": iso_format(self.day_ago + timedelta(hours=2, minutes=1)),
  43. "fingerprint": ["group2"],
  44. "tags": {"important": "yes"},
  45. },
  46. project_id=self.project.id,
  47. )
  48. class DiscoverTimeseriesQueryTest(TimeseriesBase):
  49. def test_invalid_field_in_function(self):
  50. with pytest.raises(InvalidSearchQuery):
  51. discover.timeseries_query(
  52. selected_columns=["min(transaction)"],
  53. query="transaction:api.issue.delete",
  54. referrer="test_discover_query",
  55. params={"project_id": [self.project.id]},
  56. rollup=1800,
  57. )
  58. def test_missing_start_and_end(self):
  59. with pytest.raises(InvalidSearchQuery):
  60. discover.timeseries_query(
  61. selected_columns=["count()"],
  62. query="transaction:api.issue.delete",
  63. referrer="test_discover_query",
  64. params={"project_id": [self.project.id]},
  65. rollup=1800,
  66. )
  67. def test_no_aggregations(self):
  68. with pytest.raises(InvalidSearchQuery):
  69. discover.timeseries_query(
  70. selected_columns=["transaction", "title"],
  71. query="transaction:api.issue.delete",
  72. referrer="test_discover_query",
  73. params={
  74. "start": self.day_ago,
  75. "end": self.day_ago + timedelta(hours=2),
  76. "project_id": [self.project.id],
  77. },
  78. rollup=1800,
  79. )
  80. def test_field_alias(self):
  81. result = discover.timeseries_query(
  82. selected_columns=["p95()"],
  83. query="event.type:transaction transaction:api.issue.delete",
  84. referrer="test_discover_query",
  85. params={
  86. "start": self.day_ago,
  87. "end": self.day_ago + timedelta(hours=2),
  88. "project_id": [self.project.id],
  89. },
  90. rollup=3600,
  91. )
  92. assert len(result.data["data"]) == 3
  93. def test_failure_rate_field_alias(self):
  94. result = discover.timeseries_query(
  95. selected_columns=["failure_rate()"],
  96. query="event.type:transaction transaction:api.issue.delete",
  97. referrer="test_discover_query",
  98. params={
  99. "start": self.day_ago,
  100. "end": self.day_ago + timedelta(hours=2),
  101. "project_id": [self.project.id],
  102. },
  103. rollup=3600,
  104. )
  105. assert len(result.data["data"]) == 3
  106. def test_aggregate_function(self):
  107. result = discover.timeseries_query(
  108. selected_columns=["count()"],
  109. query="",
  110. referrer="test_discover_query",
  111. params={
  112. "start": self.day_ago,
  113. "end": self.day_ago + timedelta(hours=2),
  114. "project_id": [self.project.id],
  115. },
  116. rollup=3600,
  117. )
  118. assert len(result.data["data"]) == 3
  119. assert [2] == [val["count"] for val in result.data["data"] if "count" in val]
  120. result = discover.timeseries_query(
  121. selected_columns=["count_unique(user)"],
  122. query="",
  123. referrer="test_discover_query",
  124. params={
  125. "start": self.day_ago,
  126. "end": self.day_ago + timedelta(hours=2),
  127. "project_id": [self.project.id],
  128. },
  129. rollup=3600,
  130. )
  131. assert len(result.data["data"]) == 3
  132. keys = set()
  133. for row in result.data["data"]:
  134. keys.update(list(row.keys()))
  135. assert "count_unique_user" in keys
  136. assert "time" in keys
  137. def test_comparison_aggregate_function_invalid(self):
  138. with pytest.raises(
  139. InvalidSearchQuery, match="Only one column can be selected for comparison queries"
  140. ):
  141. discover.timeseries_query(
  142. selected_columns=["count()", "count_unique(user)"],
  143. query="",
  144. referrer="test_discover_query",
  145. params={
  146. "start": self.day_ago,
  147. "end": self.day_ago + timedelta(hours=2),
  148. "project_id": [self.project.id],
  149. },
  150. rollup=3600,
  151. comparison_delta=timedelta(days=1),
  152. )
  153. def test_comparison_aggregate_function(self):
  154. self.store_event(
  155. data={
  156. "timestamp": iso_format(self.day_ago + timedelta(hours=1)),
  157. "user": {"id": 1},
  158. },
  159. project_id=self.project.id,
  160. )
  161. result = discover.timeseries_query(
  162. selected_columns=["count()"],
  163. query="",
  164. referrer="test_discover_query",
  165. params={
  166. "start": self.day_ago,
  167. "end": self.day_ago + timedelta(hours=2),
  168. "project_id": [self.project.id],
  169. },
  170. rollup=3600,
  171. comparison_delta=timedelta(days=1),
  172. )
  173. assert len(result.data["data"]) == 3
  174. # Values should all be 0, since there is no comparison period data at all.
  175. assert [(0, 0), (3, 0), (0, 0)] == [
  176. (val.get("count", 0), val.get("comparisonCount", 0)) for val in result.data["data"]
  177. ]
  178. self.store_event(
  179. data={
  180. "timestamp": iso_format(self.day_ago + timedelta(days=-1, hours=1)),
  181. "user": {"id": 1},
  182. },
  183. project_id=self.project.id,
  184. )
  185. self.store_event(
  186. data={
  187. "timestamp": iso_format(self.day_ago + timedelta(days=-1, hours=1, minutes=2)),
  188. "user": {"id": 2},
  189. },
  190. project_id=self.project.id,
  191. )
  192. self.store_event(
  193. data={
  194. "timestamp": iso_format(self.day_ago + timedelta(days=-1, hours=2, minutes=1)),
  195. },
  196. project_id=self.project.id,
  197. )
  198. result = discover.timeseries_query(
  199. selected_columns=["count()"],
  200. query="",
  201. referrer="test_discover_query",
  202. params={
  203. "start": self.day_ago,
  204. "end": self.day_ago + timedelta(hours=2, minutes=1),
  205. "project_id": [self.project.id],
  206. },
  207. rollup=3600,
  208. comparison_delta=timedelta(days=1),
  209. )
  210. assert len(result.data["data"]) == 3
  211. # In the second bucket we have 3 events in the current period and 2 in the comparison, so
  212. # we get a result of 50% increase
  213. assert [(0, 0), (3, 2), (0, 0)] == [
  214. (val.get("count", 0), val.get("comparisonCount", 0)) for val in result.data["data"]
  215. ]
  216. result = discover.timeseries_query(
  217. selected_columns=["count_unique(user)"],
  218. query="",
  219. params={
  220. "start": self.day_ago,
  221. "end": self.day_ago + timedelta(hours=2, minutes=2),
  222. "project_id": [self.project.id],
  223. },
  224. rollup=3600,
  225. referrer="test_discover_query",
  226. comparison_delta=timedelta(days=1),
  227. )
  228. assert len(result.data["data"]) == 3
  229. # In the second bucket we have 1 unique user in the current period and 2 in the comparison, so
  230. # we get a result of -50%
  231. assert [(0, 0), (1, 2), (0, 0)] == [
  232. (val.get("count_unique_user", 0), val.get("comparisonCount", 0))
  233. for val in result.data["data"]
  234. ]
  235. def test_count_miserable(self):
  236. event_data = load_data("transaction")
  237. # Half of duration so we don't get weird rounding differences when comparing the results
  238. event_data["breakdowns"]["span_ops"]["ops.http"]["value"] = 300
  239. event_data["start_timestamp"] = iso_format(self.day_ago + timedelta(minutes=30))
  240. event_data["timestamp"] = iso_format(self.day_ago + timedelta(minutes=30, seconds=3))
  241. self.store_event(data=event_data, project_id=self.project.id)
  242. ProjectTransactionThreshold.objects.create(
  243. project=self.project,
  244. organization=self.project.organization,
  245. threshold=100,
  246. metric=TransactionMetric.DURATION.value,
  247. )
  248. project2 = self.create_project()
  249. ProjectTransactionThreshold.objects.create(
  250. project=project2,
  251. organization=project2.organization,
  252. threshold=100,
  253. metric=TransactionMetric.DURATION.value,
  254. )
  255. result = discover.timeseries_query(
  256. selected_columns=["count_miserable(user)"],
  257. referrer="test_discover_query",
  258. query="",
  259. params={
  260. "start": self.day_ago,
  261. "end": self.day_ago + timedelta(hours=2),
  262. "project_id": [self.project.id, project2.id],
  263. "organization_id": self.organization.id,
  264. },
  265. rollup=3600,
  266. )
  267. assert len(result.data["data"]) == 3
  268. assert [1] == [
  269. val["count_miserable_user"]
  270. for val in result.data["data"]
  271. if "count_miserable_user" in val
  272. ]
  273. def test_count_miserable_with_arithmetic(self):
  274. event_data = load_data("transaction")
  275. # Half of duration so we don't get weird rounding differences when comparing the results
  276. event_data["breakdowns"]["span_ops"]["ops.http"]["value"] = 300
  277. event_data["start_timestamp"] = iso_format(self.day_ago + timedelta(minutes=30))
  278. event_data["timestamp"] = iso_format(self.day_ago + timedelta(minutes=30, seconds=3))
  279. self.store_event(data=event_data, project_id=self.project.id)
  280. ProjectTransactionThreshold.objects.create(
  281. project=self.project,
  282. organization=self.project.organization,
  283. threshold=100,
  284. metric=TransactionMetric.DURATION.value,
  285. )
  286. project2 = self.create_project()
  287. ProjectTransactionThreshold.objects.create(
  288. project=project2,
  289. organization=project2.organization,
  290. threshold=100,
  291. metric=TransactionMetric.DURATION.value,
  292. )
  293. result = discover.timeseries_query(
  294. selected_columns=["equation|count_miserable(user) - 100"],
  295. referrer="test_discover_query",
  296. query="",
  297. params={
  298. "start": self.day_ago,
  299. "end": self.day_ago + timedelta(hours=2),
  300. "project_id": [self.project.id, project2.id],
  301. "organization_id": self.organization.id,
  302. },
  303. rollup=3600,
  304. )
  305. assert len(result.data["data"]) == 3
  306. assert [1 - 100] == [
  307. val["equation[0]"] for val in result.data["data"] if "equation[0]" in val
  308. ]
  309. def test_equation_function(self):
  310. result = discover.timeseries_query(
  311. selected_columns=["equation|count() / 100"],
  312. query="",
  313. referrer="test_discover_query",
  314. params={
  315. "start": self.day_ago,
  316. "end": self.day_ago + timedelta(hours=2),
  317. "project_id": [self.project.id],
  318. },
  319. rollup=3600,
  320. )
  321. assert len(result.data["data"]) == 3
  322. assert [0.02] == [val["equation[0]"] for val in result.data["data"] if "equation[0]" in val]
  323. result = discover.timeseries_query(
  324. selected_columns=["equation|count_unique(user) / 100"],
  325. query="",
  326. params={
  327. "start": self.day_ago,
  328. "end": self.day_ago + timedelta(hours=2),
  329. "project_id": [self.project.id],
  330. },
  331. rollup=3600,
  332. referrer="test_discover_query",
  333. )
  334. assert len(result.data["data"]) == 3
  335. keys = set()
  336. for row in result.data["data"]:
  337. keys.update(list(row.keys()))
  338. assert "equation[0]" in keys
  339. assert "time" in keys
  340. def test_zerofilling(self):
  341. result = discover.timeseries_query(
  342. selected_columns=["count()"],
  343. query="",
  344. referrer="test_discover_query",
  345. params={
  346. "start": self.day_ago,
  347. "end": self.day_ago + timedelta(hours=3),
  348. "project_id": [self.project.id],
  349. },
  350. rollup=3600,
  351. )
  352. assert len(result.data["data"]) == 4, "Should have empty results"
  353. assert [2, 1] == [
  354. val["count"] for val in result.data["data"] if "count" in val
  355. ], result.data["data"]
  356. def test_conditional_filter(self):
  357. project2 = self.create_project(organization=self.organization)
  358. project3 = self.create_project(organization=self.organization)
  359. self.store_event(
  360. data={"message": "hello", "timestamp": iso_format(self.one_min_ago)},
  361. project_id=project2.id,
  362. )
  363. self.store_event(
  364. data={"message": "hello", "timestamp": iso_format(self.one_min_ago)},
  365. project_id=project3.id,
  366. )
  367. result = discover.timeseries_query(
  368. selected_columns=["count()"],
  369. query=f"project:{self.project.slug} OR project:{project2.slug}",
  370. params={
  371. "start": before_now(minutes=5),
  372. "end": before_now(seconds=1),
  373. "project_id": [self.project.id, project2.id, project3.id],
  374. },
  375. rollup=3600,
  376. referrer="test_discover_query",
  377. )
  378. data = result.data["data"]
  379. assert len([d for d in data if "count" in d]) == 1
  380. for d in data:
  381. if "count" in d:
  382. assert d["count"] == 1
  383. def test_nested_conditional_filter(self):
  384. project2 = self.create_project(organization=self.organization)
  385. self.store_event(
  386. data={"release": "a" * 32, "timestamp": iso_format(self.one_min_ago)},
  387. project_id=self.project.id,
  388. )
  389. self.event = self.store_event(
  390. data={"release": "b" * 32, "timestamp": iso_format(self.one_min_ago)},
  391. project_id=self.project.id,
  392. )
  393. self.event = self.store_event(
  394. data={"release": "c" * 32, "timestamp": iso_format(self.one_min_ago)},
  395. project_id=self.project.id,
  396. )
  397. self.event = self.store_event(
  398. data={"release": "a" * 32, "timestamp": iso_format(self.one_min_ago)},
  399. project_id=project2.id,
  400. )
  401. result = discover.timeseries_query(
  402. selected_columns=["release", "count()"],
  403. query="(release:{} OR release:{}) AND project:{}".format(
  404. "a" * 32, "b" * 32, self.project.slug
  405. ),
  406. params={
  407. "start": before_now(minutes=5),
  408. "end": before_now(seconds=1),
  409. "project_id": [self.project.id, project2.id],
  410. },
  411. rollup=3600,
  412. referrer="test_discover_query",
  413. )
  414. data = result.data["data"]
  415. data = result.data["data"]
  416. assert len([d for d in data if "count" in d]) == 1
  417. for d in data:
  418. if "count" in d:
  419. assert d["count"] == 2
  420. @pytest.mark.skip("These tests are specific to json which we no longer use")
  421. class TopEventsTimeseriesQueryTest(TimeseriesBase):
  422. @patch("sentry.snuba.discover.raw_query")
  423. def test_project_filter_adjusts_filter(self, mock_query):
  424. """While the function is called with 2 project_ids, we should limit it down to the 1 in top_events"""
  425. project2 = self.create_project(organization=self.organization)
  426. top_events = {
  427. "data": [
  428. {
  429. "project": self.project.slug,
  430. "project.id": self.project.id,
  431. }
  432. ]
  433. }
  434. start = before_now(minutes=5)
  435. end = before_now(seconds=1)
  436. discover.top_events_timeseries(
  437. selected_columns=["project", "count()"],
  438. params={
  439. "start": start,
  440. "end": end,
  441. "project_id": [self.project.id, project2.id],
  442. },
  443. rollup=3600,
  444. top_events=top_events,
  445. timeseries_columns=["count()"],
  446. user_query="",
  447. orderby=["count()"],
  448. limit=10000,
  449. organization=self.organization,
  450. )
  451. mock_query.assert_called_with(
  452. aggregations=[["count", None, "count"]],
  453. conditions=[],
  454. # Should be limited to the project in top_events
  455. filter_keys={"project_id": [self.project.id]},
  456. selected_columns=[
  457. "project_id",
  458. [
  459. "transform",
  460. [
  461. ["toString", ["project_id"]],
  462. ["array", [f"'{project.id}'" for project in [self.project, project2]]],
  463. ["array", [f"'{project.slug}'" for project in [self.project, project2]]],
  464. "''",
  465. ],
  466. "project",
  467. ],
  468. ],
  469. start=start,
  470. end=end,
  471. rollup=3600,
  472. orderby=["time", "project_id"],
  473. groupby=["time", "project_id"],
  474. dataset=Dataset.Discover,
  475. limit=10000,
  476. referrer=None,
  477. )
  478. @patch("sentry.snuba.discover.raw_query")
  479. def test_timestamp_fields(self, mock_query):
  480. timestamp1 = before_now(days=2, minutes=5)
  481. timestamp2 = before_now(minutes=2)
  482. top_events = {
  483. "data": [
  484. {
  485. "timestamp": iso_format(timestamp1),
  486. "timestamp.to_hour": iso_format(timestamp1.replace(minute=0, second=0)),
  487. "timestamp.to_day": iso_format(timestamp1.replace(hour=0, minute=0, second=0)),
  488. },
  489. {
  490. "timestamp": iso_format(timestamp2),
  491. "timestamp.to_hour": iso_format(timestamp2.replace(minute=0, second=0)),
  492. "timestamp.to_day": iso_format(timestamp2.replace(hour=0, minute=0, second=0)),
  493. },
  494. ]
  495. }
  496. start = before_now(days=3, minutes=10)
  497. end = before_now(minutes=1)
  498. discover.top_events_timeseries(
  499. selected_columns=["timestamp", "timestamp.to_day", "timestamp.to_hour", "count()"],
  500. params={
  501. "start": start,
  502. "end": end,
  503. "project_id": [self.project.id],
  504. },
  505. rollup=3600,
  506. top_events=top_events,
  507. timeseries_columns=["count()"],
  508. user_query="",
  509. orderby=["count()"],
  510. limit=10000,
  511. organization=self.organization,
  512. )
  513. to_hour = ["toStartOfHour", ["timestamp"], "timestamp.to_hour"]
  514. to_day = ["toStartOfDay", ["timestamp"], "timestamp.to_day"]
  515. mock_query.assert_called_with(
  516. aggregations=[["count", None, "count"]],
  517. conditions=[
  518. # Each timestamp field should generated a nested condition.
  519. # Within each, the conditions will be ORed together.
  520. [
  521. ["timestamp", "=", iso_format(timestamp1)],
  522. ["timestamp", "=", iso_format(timestamp2)],
  523. ],
  524. [
  525. [
  526. to_day,
  527. "=",
  528. iso_format(timestamp1.replace(hour=0, minute=0, second=0)),
  529. ],
  530. [
  531. to_day,
  532. "=",
  533. iso_format(timestamp2.replace(hour=0, minute=0, second=0)),
  534. ],
  535. ],
  536. [
  537. [to_hour, "=", iso_format(timestamp1.replace(minute=0, second=0))],
  538. [to_hour, "=", iso_format(timestamp2.replace(minute=0, second=0))],
  539. ],
  540. ],
  541. filter_keys={"project_id": [self.project.id]},
  542. selected_columns=[
  543. "timestamp",
  544. to_day,
  545. to_hour,
  546. ],
  547. start=start,
  548. end=end,
  549. rollup=3600,
  550. orderby=["time", "timestamp", "timestamp.to_day", "timestamp.to_hour"],
  551. groupby=["time", "timestamp", "timestamp.to_day", "timestamp.to_hour"],
  552. dataset=Dataset.Discover,
  553. limit=10000,
  554. referrer=None,
  555. )
  556. @patch("sentry.snuba.discover.query")
  557. def test_equation_fields_are_auto_added(self, mock_query):
  558. start = before_now(minutes=5)
  559. end = before_now(seconds=1)
  560. discover.top_events_timeseries(
  561. selected_columns=["count()"],
  562. equations=["equation|count_unique(user) * 2"],
  563. params={"start": start, "end": end, "project_id": [self.project.id]},
  564. rollup=3600,
  565. timeseries_columns=[],
  566. user_query="",
  567. orderby=["equation[0]"],
  568. limit=10000,
  569. organization=self.organization,
  570. )
  571. mock_query.assert_called_with(
  572. ["count()"],
  573. query="",
  574. params={"start": start, "end": end, "project_id": [self.project.id]},
  575. equations=["equation|count_unique(user) * 2"],
  576. orderby=["equation[0]"],
  577. referrer=None,
  578. limit=10000,
  579. auto_aggregations=True,
  580. use_aggregate_conditions=True,
  581. include_equation_fields=True,
  582. )