test_organization_events_histogram.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027
  1. import random
  2. from collections import namedtuple
  3. from copy import deepcopy
  4. from datetime import timedelta
  5. import pytest
  6. from django.urls import reverse
  7. from rest_framework.exceptions import ErrorDetail
  8. from sentry.testutils import APITestCase, SnubaTestCase
  9. from sentry.testutils.helpers.datetime import before_now, iso_format
  10. from sentry.utils.samples import load_data
  11. from sentry.utils.snuba import get_array_column_alias
  12. HistogramSpec = namedtuple("HistogramSpec", ["start", "end", "fields"])
  13. ARRAY_COLUMNS = ["measurements", "span_op_breakdowns"]
  14. class OrganizationEventsHistogramEndpointTest(APITestCase, SnubaTestCase):
  15. def setUp(self):
  16. super().setUp()
  17. self.min_ago = iso_format(before_now(minutes=1))
  18. self.data = load_data("transaction")
  19. self.features = {}
  20. def populate_events(self, specs):
  21. start = before_now(minutes=5)
  22. for spec in specs:
  23. spec = HistogramSpec(*spec)
  24. for suffix_key, count in spec.fields:
  25. for i in range(count):
  26. data = deepcopy(self.data)
  27. measurement_name = suffix_key
  28. breakdown_name = f"ops.{suffix_key}"
  29. data["timestamp"] = iso_format(start)
  30. data["start_timestamp"] = iso_format(start - timedelta(seconds=i))
  31. value = random.random() * (spec.end - spec.start) + spec.start
  32. data["transaction"] = f"/measurement/{measurement_name}/value/{value}"
  33. data["measurements"] = {measurement_name: {"value": value}}
  34. data["breakdowns"] = {
  35. "span_ops": {
  36. breakdown_name: {"value": value},
  37. }
  38. }
  39. self.store_event(data, self.project.id)
  40. def as_response_data(self, specs):
  41. data = {}
  42. for spec in specs:
  43. spec = HistogramSpec(*spec)
  44. for measurement, count in sorted(spec.fields):
  45. if measurement not in data:
  46. data[measurement] = []
  47. data[measurement].append({"bin": spec.start, "count": count})
  48. return data
  49. def do_request(self, query, features=None):
  50. if features is None:
  51. features = {"organizations:performance-view": True}
  52. features.update(self.features)
  53. self.login_as(user=self.user)
  54. url = reverse(
  55. "sentry-api-0-organization-events-histogram",
  56. kwargs={"organization_slug": self.organization.slug},
  57. )
  58. with self.feature(features):
  59. return self.client.get(url, query, format="json")
  60. def test_no_projects(self):
  61. response = self.do_request({})
  62. assert response.status_code == 200, response.content
  63. assert response.data == {}
  64. def test_good_params(self):
  65. for array_column in ARRAY_COLUMNS:
  66. alias = get_array_column_alias(array_column)
  67. query = {
  68. "query": "event.type:transaction",
  69. "project": [self.project.id],
  70. "field": [f"{alias}.foo", f"{alias}.bar"],
  71. "numBuckets": 10,
  72. }
  73. response = self.do_request(query)
  74. assert response.status_code == 200, f"failing for {array_column}"
  75. def test_good_params_with_optionals(self):
  76. for array_column in ARRAY_COLUMNS:
  77. alias = get_array_column_alias(array_column)
  78. query = {
  79. "query": "event.type:transaction",
  80. "project": [self.project.id],
  81. "field": [f"{alias}.foo", f"{alias}.bar"],
  82. "numBuckets": 10,
  83. "precision": 0,
  84. "min": 0,
  85. "max": 10,
  86. }
  87. response = self.do_request(query)
  88. assert response.status_code == 200, f"failing for {array_column}"
  89. def test_bad_params_reverse_min_max(self):
  90. for array_column in ARRAY_COLUMNS:
  91. alias = get_array_column_alias(array_column)
  92. query = {
  93. "query": "event.type:transaction",
  94. "project": [self.project.id],
  95. "field": [f"{alias}.foo", f"{alias}.bar"],
  96. "numBuckets": 10,
  97. "precision": 0,
  98. "min": 10,
  99. "max": 5,
  100. }
  101. response = self.do_request(query)
  102. assert response.data == {"non_field_errors": ["min cannot be greater than max."]}
  103. def test_bad_params_missing_fields(self):
  104. query = {
  105. "project": [self.project.id],
  106. "numBuckets": 10,
  107. }
  108. response = self.do_request(query)
  109. assert response.status_code == 400
  110. assert response.data == {
  111. "field": [ErrorDetail(string="This field is required.", code="required")],
  112. }
  113. def test_bad_params_too_many_fields(self):
  114. query = {
  115. "project": [self.project.id],
  116. "field": ["foo", "bar", "baz", "qux", "quux"],
  117. "numBuckets": 10,
  118. "min": 0,
  119. "max": 100,
  120. "precision": 0,
  121. }
  122. response = self.do_request(query)
  123. assert response.status_code == 400
  124. assert response.data == {
  125. "field": ["Ensure this field has no more than 4 elements."],
  126. }
  127. def test_bad_params_mixed_fields(self):
  128. for array_column in ARRAY_COLUMNS:
  129. for other_array_column in ARRAY_COLUMNS:
  130. query = {
  131. "project": [self.project.id],
  132. "field": [
  133. "foo",
  134. f"{get_array_column_alias(array_column)}.foo",
  135. f"{get_array_column_alias(other_array_column)}.bar",
  136. ],
  137. "numBuckets": 10,
  138. "min": 0,
  139. "max": 100,
  140. "precision": 0,
  141. }
  142. response = self.do_request(query)
  143. assert response.status_code == 400, f"failing for {array_column}"
  144. assert response.data == {
  145. "field": [
  146. "You can only generate histogram for one column at a time unless they are all measurements or all span op breakdowns."
  147. ],
  148. }, f"failing for {array_column}"
  149. def test_bad_params_missing_num_buckets(self):
  150. query = {
  151. "project": [self.project.id],
  152. "field": ["foo"],
  153. }
  154. response = self.do_request(query)
  155. assert response.status_code == 400
  156. assert response.data == {
  157. "numBuckets": ["This field is required."],
  158. }
  159. def test_bad_params_invalid_num_buckets(self):
  160. for array_column in ARRAY_COLUMNS:
  161. alias = get_array_column_alias(array_column)
  162. query = {
  163. "project": [self.project.id],
  164. "field": [f"{alias}.foo", f"{alias}.bar"],
  165. "numBuckets": "baz",
  166. }
  167. response = self.do_request(query)
  168. assert response.status_code == 400, f"failing for {array_column}"
  169. assert response.data == {
  170. "numBuckets": ["A valid integer is required."],
  171. }, f"failing for {array_column}"
  172. def test_bad_params_invalid_negative_num_buckets(self):
  173. for array_column in ARRAY_COLUMNS:
  174. alias = get_array_column_alias(array_column)
  175. query = {
  176. "project": [self.project.id],
  177. "field": [f"{alias}.foo", f"{alias}.bar"],
  178. "numBuckets": -1,
  179. }
  180. response = self.do_request(query)
  181. assert response.status_code == 400, f"failing for {array_column}"
  182. assert response.data == {
  183. "numBuckets": ["Ensure this value is greater than or equal to 1."],
  184. }, f"failing for {array_column}"
  185. def test_bad_params_num_buckets_too_large(self):
  186. for array_column in ARRAY_COLUMNS:
  187. alias = get_array_column_alias(array_column)
  188. query = {
  189. "project": [self.project.id],
  190. "field": [f"{alias}.foo", f"{alias}.bar"],
  191. "numBuckets": 150,
  192. }
  193. response = self.do_request(query)
  194. assert response.status_code == 400, f"failing for {array_column}"
  195. assert response.data == {
  196. "numBuckets": ["Ensure this value is less than or equal to 100."],
  197. }, f"failing for {array_column}"
  198. def test_bad_params_invalid_precision_too_small(self):
  199. for array_column in ARRAY_COLUMNS:
  200. alias = get_array_column_alias(array_column)
  201. query = {
  202. "project": [self.project.id],
  203. "field": [f"{alias}.foo", f"{alias}.bar"],
  204. "numBuckets": 10,
  205. "precision": -1,
  206. }
  207. response = self.do_request(query)
  208. assert response.status_code == 400, f"failing for {array_column}"
  209. assert response.data == {
  210. "precision": ["Ensure this value is greater than or equal to 0."],
  211. }, f"failing for {array_column}"
  212. def test_bad_params_invalid_precision_too_big(self):
  213. for array_column in ARRAY_COLUMNS:
  214. alias = get_array_column_alias(array_column)
  215. query = {
  216. "project": [self.project.id],
  217. "field": [f"{alias}.foo", f"{alias}.bar"],
  218. "numBuckets": 10,
  219. "precision": 100,
  220. }
  221. response = self.do_request(query)
  222. assert response.status_code == 400, f"failing for {array_column}"
  223. assert response.data == {
  224. "precision": ["Ensure this value is less than or equal to 4."],
  225. }, f"failing for {array_column}"
  226. def test_bad_params_invalid_min(self):
  227. for array_column in ARRAY_COLUMNS:
  228. alias = get_array_column_alias(array_column)
  229. query = {
  230. "project": [self.project.id],
  231. "field": [f"{alias}.foo", f"{alias}.bar"],
  232. "numBuckets": 10,
  233. "min": "qux",
  234. }
  235. response = self.do_request(query)
  236. assert response.status_code == 400, f"failing for {array_column}"
  237. assert response.data == {
  238. "min": ["A valid number is required."],
  239. }, f"failing for {array_column}"
  240. def test_bad_params_invalid_max(self):
  241. for array_column in ARRAY_COLUMNS:
  242. alias = get_array_column_alias(array_column)
  243. query = {
  244. "project": [self.project.id],
  245. "field": [f"{alias}.foo", f"{alias}.bar"],
  246. "numBuckets": 10,
  247. "max": "qux",
  248. }
  249. response = self.do_request(query)
  250. assert response.status_code == 400, f"failing for {array_column}"
  251. assert response.data == {
  252. "max": ["A valid number is required."],
  253. }, f"failing for {array_column}"
  254. def test_histogram_empty(self):
  255. for array_column in ARRAY_COLUMNS:
  256. alias = get_array_column_alias(array_column)
  257. query = {
  258. "project": [self.project.id],
  259. "field": [f"{alias}.foo", f"{alias}.bar"],
  260. "numBuckets": 5,
  261. }
  262. response = self.do_request(query)
  263. assert response.status_code == 200, f"failing for {array_column}"
  264. expected = [(i, i + 1, [(f"{alias}.foo", 0), (f"{alias}.bar", 0)]) for i in range(5)]
  265. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  266. def test_histogram_simple(self):
  267. # range is [0, 5), so it is divided into 5 buckets of width 1
  268. specs = [
  269. (0, 1, [("foo", 1)]),
  270. (1, 2, [("foo", 1)]),
  271. (2, 3, [("foo", 1)]),
  272. (4, 5, [("foo", 1)]),
  273. ]
  274. self.populate_events(specs)
  275. for array_column in ARRAY_COLUMNS:
  276. alias = get_array_column_alias(array_column)
  277. query = {
  278. "project": [self.project.id],
  279. "field": [f"{alias}.foo"],
  280. "numBuckets": 5,
  281. }
  282. response = self.do_request(query)
  283. assert response.status_code == 200, f"failing for {array_column}"
  284. expected = [
  285. (0, 1, [(f"{alias}.foo", 1)]),
  286. (1, 2, [(f"{alias}.foo", 1)]),
  287. (2, 3, [(f"{alias}.foo", 1)]),
  288. (3, 4, [(f"{alias}.foo", 0)]),
  289. (4, 5, [(f"{alias}.foo", 1)]),
  290. ]
  291. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  292. def test_histogram_simple_using_min_max(self):
  293. # range is [0, 5), so it is divided into 5 buckets of width 1
  294. specs = [
  295. (0, 1, [("foo", 1)]),
  296. (1, 2, [("foo", 1)]),
  297. (2, 3, [("foo", 1)]),
  298. (4, 5, [("foo", 1)]),
  299. ]
  300. self.populate_events(specs)
  301. for array_column in ARRAY_COLUMNS:
  302. alias = get_array_column_alias(array_column)
  303. query = {
  304. "project": [self.project.id],
  305. "field": [f"{alias}.foo"],
  306. "numBuckets": 5,
  307. "min": 0,
  308. "max": 5,
  309. }
  310. response = self.do_request(query)
  311. assert response.status_code == 200, f"failing for {array_column}"
  312. expected = [
  313. (0, 1, [(f"{alias}.foo", 1)]),
  314. (1, 2, [(f"{alias}.foo", 1)]),
  315. (2, 3, [(f"{alias}.foo", 1)]),
  316. (3, 4, [(f"{alias}.foo", 0)]),
  317. (4, 5, [(f"{alias}.foo", 1)]),
  318. ]
  319. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  320. def test_histogram_simple_using_given_min_above_queried_max(self):
  321. # All these events are out of range of the query parameters,
  322. # and should not appear in the results.
  323. specs = [
  324. (0, 1, [("foo", 1)]),
  325. (1, 2, [("foo", 1)]),
  326. (2, 3, [("foo", 1)]),
  327. (4, 5, [("foo", 1)]),
  328. ]
  329. self.populate_events(specs)
  330. for array_column in ARRAY_COLUMNS:
  331. alias = get_array_column_alias(array_column)
  332. query = {
  333. "project": [self.project.id],
  334. "field": [f"{alias}.foo"],
  335. "numBuckets": 5,
  336. "min": 6,
  337. }
  338. response = self.do_request(query)
  339. assert response.status_code == 200, f"failing for {array_column}"
  340. expected = [
  341. (6, 7, [(f"{alias}.foo", 0)]),
  342. ]
  343. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  344. def test_histogram_simple_using_given_max_below_queried_min(self):
  345. # All these events are out of range of the query parameters,
  346. # and should not appear in the results.
  347. specs = [
  348. (6, 7, [("foo", 1)]),
  349. (8, 9, [("foo", 1)]),
  350. (10, 11, [("foo", 1)]),
  351. (12, 13, [("foo", 1)]),
  352. ]
  353. self.populate_events(specs)
  354. for array_column in ARRAY_COLUMNS:
  355. alias = get_array_column_alias(array_column)
  356. query = {
  357. "project": [self.project.id],
  358. "field": [f"{alias}.foo"],
  359. "numBuckets": 5,
  360. "max": 6,
  361. }
  362. response = self.do_request(query)
  363. assert response.status_code == 200, f"failing for {array_column}"
  364. expected = [
  365. (5, 6, [(f"{alias}.foo", 0)]),
  366. ]
  367. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  368. def test_histogram_large_buckets(self):
  369. # make sure that it works for large width buckets
  370. # range is [0, 99], so it is divided into 5 buckets of width 20
  371. specs = [
  372. (0, 0, [("foo", 2)]),
  373. (99, 99, [("foo", 2)]),
  374. ]
  375. self.populate_events(specs)
  376. for array_column in ARRAY_COLUMNS:
  377. alias = get_array_column_alias(array_column)
  378. query = {
  379. "project": [self.project.id],
  380. "field": [f"{alias}.foo"],
  381. "numBuckets": 5,
  382. }
  383. response = self.do_request(query)
  384. assert response.status_code == 200, f"failing for {array_column}"
  385. expected = [
  386. (0, 20, [(f"{alias}.foo", 2)]),
  387. (20, 40, [(f"{alias}.foo", 0)]),
  388. (40, 60, [(f"{alias}.foo", 0)]),
  389. (60, 80, [(f"{alias}.foo", 0)]),
  390. (80, 100, [(f"{alias}.foo", 2)]),
  391. ]
  392. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  393. def test_histogram_non_zero_offset(self):
  394. # range is [10, 15), so it is divided into 5 buckets of width 1
  395. specs = [
  396. (10, 11, [("foo", 1)]),
  397. (12, 13, [("foo", 1)]),
  398. (13, 14, [("foo", 1)]),
  399. (14, 15, [("foo", 1)]),
  400. ]
  401. self.populate_events(specs)
  402. for array_column in ARRAY_COLUMNS:
  403. alias = get_array_column_alias(array_column)
  404. query = {
  405. "project": [self.project.id],
  406. "field": [f"{alias}.foo"],
  407. "numBuckets": 5,
  408. }
  409. response = self.do_request(query)
  410. assert response.status_code == 200, f"failing for {array_column}"
  411. expected = [
  412. (10, 11, [(f"{alias}.foo", 1)]),
  413. (11, 12, [(f"{alias}.foo", 0)]),
  414. (12, 13, [(f"{alias}.foo", 1)]),
  415. (13, 14, [(f"{alias}.foo", 1)]),
  416. (14, 15, [(f"{alias}.foo", 1)]),
  417. ]
  418. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  419. def test_histogram_extra_data(self):
  420. # range is [11, 16), so it is divided into 5 buckets of width 1
  421. # make sure every bin has some value
  422. specs = [
  423. (10, 11, [("foo", 1)]),
  424. (11, 12, [("foo", 1)]),
  425. (12, 13, [("foo", 1)]),
  426. (13, 14, [("foo", 1)]),
  427. (14, 15, [("foo", 1)]),
  428. (15, 16, [("foo", 1)]),
  429. (16, 17, [("foo", 1)]),
  430. ]
  431. self.populate_events(specs)
  432. for array_column in ARRAY_COLUMNS:
  433. alias = get_array_column_alias(array_column)
  434. query = {
  435. "project": [self.project.id],
  436. "field": [f"{alias}.foo"],
  437. "numBuckets": 5,
  438. "min": 11,
  439. "max": 16,
  440. }
  441. response = self.do_request(query)
  442. assert response.status_code == 200, f"failing for {array_column}"
  443. expected = [
  444. (11, 12, [(f"{alias}.foo", 1)]),
  445. (12, 13, [(f"{alias}.foo", 1)]),
  446. (13, 14, [(f"{alias}.foo", 1)]),
  447. (14, 15, [(f"{alias}.foo", 1)]),
  448. (15, 16, [(f"{alias}.foo", 1)]),
  449. ]
  450. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  451. def test_histogram_non_zero_min_large_buckets(self):
  452. # range is [10, 59], so it is divided into 5 buckets of width 10
  453. specs = [
  454. (10, 10, [("foo", 1)]),
  455. (40, 50, [("foo", 1)]),
  456. (59, 59, [("foo", 2)]),
  457. ]
  458. self.populate_events(specs)
  459. for array_column in ARRAY_COLUMNS:
  460. alias = get_array_column_alias(array_column)
  461. query = {
  462. "project": [self.project.id],
  463. "field": [f"{alias}.foo"],
  464. "numBuckets": 5,
  465. }
  466. response = self.do_request(query)
  467. assert response.status_code == 200, f"failing for {array_column}"
  468. expected = [
  469. (10, 20, [(f"{alias}.foo", 1)]),
  470. (20, 30, [(f"{alias}.foo", 0)]),
  471. (30, 40, [(f"{alias}.foo", 0)]),
  472. (40, 50, [(f"{alias}.foo", 1)]),
  473. (50, 60, [(f"{alias}.foo", 2)]),
  474. ]
  475. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  476. @pytest.mark.xfail(reason="snuba does not allow - in alias names")
  477. def test_histogram_negative_values(self):
  478. # range is [-9, -4), so it is divided into 5 buckets of width 1
  479. specs = [
  480. (-9, -8, [("foo", 3)]),
  481. (-5, -4, [("foo", 1)]),
  482. ]
  483. self.populate_events(specs)
  484. for array_column in ARRAY_COLUMNS:
  485. alias = get_array_column_alias(array_column)
  486. query = {
  487. "project": [self.project.id],
  488. "field": [f"{alias}.foo"],
  489. "numBuckets": 5,
  490. }
  491. response = self.do_request(query)
  492. assert response.status_code == 200, f"failing for {array_column}"
  493. expected = [
  494. (-9, -8, [(f"{alias}.foo", 3)]),
  495. (-8, -7, [(f"{alias}.foo", 0)]),
  496. (-7, -6, [(f"{alias}.foo", 0)]),
  497. (-6, -5, [(f"{alias}.foo", 0)]),
  498. (-5, -4, [(f"{alias}.foo", 1)]),
  499. ]
  500. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  501. @pytest.mark.xfail(reason="snuba does not allow - in alias names")
  502. def test_histogram_positive_and_negative_values(self):
  503. # range is [-50, 49], so it is divided into 5 buckets of width 10
  504. specs = [
  505. (-50, -50, [("foo", 1)]),
  506. (-10, 10, [("foo", 2)]),
  507. (49, 49, [("foo", 1)]),
  508. ]
  509. self.populate_events(specs)
  510. for array_column in ARRAY_COLUMNS:
  511. alias = get_array_column_alias(array_column)
  512. query = {
  513. "project": [self.project.id],
  514. "field": [f"{alias}.foo"],
  515. "numBuckets": 5,
  516. }
  517. response = self.do_request(query)
  518. assert response.status_code == 200, f"failing for {array_column}"
  519. expected = [
  520. (-50, -30, [(f"{alias}.foo", 1)]),
  521. (-30, -10, [(f"{alias}.foo", 0)]),
  522. (-10, 10, [(f"{alias}.foo", 2)]),
  523. (10, 30, [(f"{alias}.foo", 0)]),
  524. (30, 50, [(f"{alias}.foo", 1)]),
  525. ]
  526. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  527. def test_histogram_increased_precision(self):
  528. # range is [1.00, 2.24], so it is divided into 5 buckets of width 0.25
  529. specs = [
  530. (1.00, 1.00, [("foo", 3)]),
  531. (2.24, 2.24, [("foo", 1)]),
  532. ]
  533. self.populate_events(specs)
  534. for array_column in ARRAY_COLUMNS:
  535. alias = get_array_column_alias(array_column)
  536. query = {
  537. "project": [self.project.id],
  538. "field": [f"{alias}.foo"],
  539. "numBuckets": 5,
  540. "precision": 2,
  541. }
  542. response = self.do_request(query)
  543. assert response.status_code == 200, f"failing for {array_column}"
  544. expected = [
  545. (1.00, 1.25, [(f"{alias}.foo", 3)]),
  546. (1.25, 1.50, [(f"{alias}.foo", 0)]),
  547. (1.50, 1.75, [(f"{alias}.foo", 0)]),
  548. (1.75, 2.00, [(f"{alias}.foo", 0)]),
  549. (2.00, 2.25, [(f"{alias}.foo", 1)]),
  550. ]
  551. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  552. def test_histogram_increased_precision_with_min_max(self):
  553. # range is [1.25, 2.24], so it is divided into 5 buckets of width 0.25
  554. specs = [
  555. (1.00, 1.25, [("foo", 3)]),
  556. (2.00, 2.25, [("foo", 1)]),
  557. ]
  558. self.populate_events(specs)
  559. for array_column in ARRAY_COLUMNS:
  560. alias = get_array_column_alias(array_column)
  561. query = {
  562. "project": [self.project.id],
  563. "field": [f"{alias}.foo"],
  564. "numBuckets": 3,
  565. "precision": 2,
  566. "min": 1.25,
  567. "max": 2.00,
  568. }
  569. response = self.do_request(query)
  570. assert response.status_code == 200, f"failing for {array_column}"
  571. expected = [
  572. (1.25, 1.50, [(f"{alias}.foo", 0)]),
  573. (1.50, 1.75, [(f"{alias}.foo", 0)]),
  574. (1.75, 2.00, [(f"{alias}.foo", 0)]),
  575. ]
  576. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  577. def test_histogram_increased_precision_large_buckets(self):
  578. # range is [10.0000, 59.9999] so it is divided into 5 buckets of width 10
  579. specs = [
  580. (10.0000, 10.0000, [("foo", 1)]),
  581. (30.0000, 40.0000, [("foo", 1)]),
  582. (59.9999, 59.9999, [("foo", 2)]),
  583. ]
  584. self.populate_events(specs)
  585. for array_column in ARRAY_COLUMNS:
  586. alias = get_array_column_alias(array_column)
  587. query = {
  588. "project": [self.project.id],
  589. "field": [f"{alias}.foo"],
  590. "numBuckets": 5,
  591. "precision": 4,
  592. }
  593. response = self.do_request(query)
  594. assert response.status_code == 200, f"failing for {array_column}"
  595. expected = [
  596. (10.0000, 20.0000, [(f"{alias}.foo", 1)]),
  597. (20.0000, 30.0000, [(f"{alias}.foo", 0)]),
  598. (30.0000, 40.0000, [(f"{alias}.foo", 1)]),
  599. (40.0000, 50.0000, [(f"{alias}.foo", 0)]),
  600. (50.0000, 60.0000, [(f"{alias}.foo", 2)]),
  601. ]
  602. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  603. def test_histogram_multiple_measures(self):
  604. # range is [10, 59] so it is divided into 5 buckets of width 10
  605. specs = [
  606. (10, 10, [("bar", 0), ("baz", 0), ("foo", 1)]),
  607. (30, 40, [("bar", 2), ("baz", 0), ("foo", 0)]),
  608. (59, 59, [("bar", 0), ("baz", 1), ("foo", 0)]),
  609. ]
  610. self.populate_events(specs)
  611. for array_column in ARRAY_COLUMNS:
  612. alias = get_array_column_alias(array_column)
  613. query = {
  614. "project": [self.project.id],
  615. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  616. "numBuckets": 5,
  617. }
  618. response = self.do_request(query)
  619. assert response.status_code == 200, f"failing for {array_column}"
  620. expected = [
  621. (
  622. 10,
  623. 20,
  624. [
  625. (f"{alias}.bar", 0),
  626. (f"{alias}.baz", 0),
  627. (f"{alias}.foo", 1),
  628. ],
  629. ),
  630. (
  631. 20,
  632. 30,
  633. [
  634. (f"{alias}.bar", 0),
  635. (f"{alias}.baz", 0),
  636. (f"{alias}.foo", 0),
  637. ],
  638. ),
  639. (
  640. 30,
  641. 40,
  642. [
  643. (f"{alias}.bar", 2),
  644. (f"{alias}.baz", 0),
  645. (f"{alias}.foo", 0),
  646. ],
  647. ),
  648. (
  649. 40,
  650. 50,
  651. [
  652. (f"{alias}.bar", 0),
  653. (f"{alias}.baz", 0),
  654. (f"{alias}.foo", 0),
  655. ],
  656. ),
  657. (
  658. 50,
  659. 60,
  660. [
  661. (f"{alias}.bar", 0),
  662. (f"{alias}.baz", 1),
  663. (f"{alias}.foo", 0),
  664. ],
  665. ),
  666. ]
  667. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  668. def test_histogram_max_value_on_edge(self):
  669. # range is [11, 21] so it is divided into 5 buckets of width 5
  670. # because using buckets of width 2 will exclude 21, and the next
  671. # nice number is 5
  672. specs = [
  673. (11, 11, [("bar", 0), ("baz", 0), ("foo", 1)]),
  674. (21, 21, [("bar", 1), ("baz", 1), ("foo", 1)]),
  675. ]
  676. self.populate_events(specs)
  677. for array_column in ARRAY_COLUMNS:
  678. alias = get_array_column_alias(array_column)
  679. query = {
  680. "project": [self.project.id],
  681. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  682. "numBuckets": 5,
  683. }
  684. response = self.do_request(query)
  685. assert response.status_code == 200, f"failing for {array_column}"
  686. expected = [
  687. (
  688. 10,
  689. 15,
  690. [
  691. (f"{alias}.bar", 0),
  692. (f"{alias}.baz", 0),
  693. (f"{alias}.foo", 1),
  694. ],
  695. ),
  696. (
  697. 15,
  698. 20,
  699. [
  700. (f"{alias}.bar", 0),
  701. (f"{alias}.baz", 0),
  702. (f"{alias}.foo", 0),
  703. ],
  704. ),
  705. (
  706. 20,
  707. 25,
  708. [
  709. (f"{alias}.bar", 1),
  710. (f"{alias}.baz", 1),
  711. (f"{alias}.foo", 1),
  712. ],
  713. ),
  714. ]
  715. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  716. def test_histogram_bins_exceed_max(self):
  717. specs = [
  718. (10, 15, [("bar", 0), ("baz", 0), ("foo", 1)]),
  719. (30, 30, [("bar", 1), ("baz", 1), ("foo", 1)]),
  720. ]
  721. self.populate_events(specs)
  722. for array_column in ARRAY_COLUMNS:
  723. alias = get_array_column_alias(array_column)
  724. query = {
  725. "project": [self.project.id],
  726. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  727. "numBuckets": 5,
  728. "min": 10,
  729. "max": 21,
  730. }
  731. response = self.do_request(query)
  732. assert response.status_code == 200, f"failing for {array_column}"
  733. expected = [
  734. (
  735. 10,
  736. 15,
  737. [
  738. (f"{alias}.bar", 0),
  739. (f"{alias}.baz", 0),
  740. (f"{alias}.foo", 1),
  741. ],
  742. ),
  743. (
  744. 15,
  745. 20,
  746. [
  747. (f"{alias}.bar", 0),
  748. (f"{alias}.baz", 0),
  749. (f"{alias}.foo", 0),
  750. ],
  751. ),
  752. (
  753. 20,
  754. 25,
  755. [
  756. (f"{alias}.bar", 0),
  757. (f"{alias}.baz", 0),
  758. (f"{alias}.foo", 0),
  759. ],
  760. ),
  761. ]
  762. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  763. def test_bad_params_invalid_data_filter(self):
  764. for array_column in ARRAY_COLUMNS:
  765. alias = get_array_column_alias(array_column)
  766. query = {
  767. "project": [self.project.id],
  768. "field": [f"{alias}.foo", f"{alias}.bar"],
  769. "numBuckets": 10,
  770. "dataFilter": "invalid",
  771. }
  772. response = self.do_request(query)
  773. assert response.status_code == 400, f"failing for {array_column}"
  774. assert response.data == {
  775. "dataFilter": ['"invalid" is not a valid choice.'],
  776. }, f"failing for {array_column}"
  777. def test_histogram_all_data_filter(self):
  778. specs = [
  779. (0, 1, [("foo", 4)]),
  780. (4000, 5000, [("foo", 1)]),
  781. ]
  782. self.populate_events(specs)
  783. for array_column in ARRAY_COLUMNS:
  784. alias = get_array_column_alias(array_column)
  785. query = {
  786. "project": [self.project.id],
  787. "field": [f"{alias}.foo"],
  788. "numBuckets": 5,
  789. "dataFilter": "all",
  790. }
  791. response = self.do_request(query)
  792. assert response.status_code == 200, f"failing for {array_column}"
  793. expected = [
  794. (0, 1000, [(f"{alias}.foo", 4)]),
  795. (1000, 2000, [(f"{alias}.foo", 0)]),
  796. (2000, 3000, [(f"{alias}.foo", 0)]),
  797. (3000, 4000, [(f"{alias}.foo", 0)]),
  798. (4000, 5000, [(f"{alias}.foo", 1)]),
  799. ]
  800. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  801. def test_histogram_exclude_outliers_data_filter(self):
  802. specs = [
  803. (0, 0, [("foo", 4)]),
  804. (4000, 4001, [("foo", 1)]),
  805. ]
  806. self.populate_events(specs)
  807. for array_column in ARRAY_COLUMNS:
  808. alias = get_array_column_alias(array_column)
  809. query = {
  810. "project": [self.project.id],
  811. "field": [f"{alias}.foo"],
  812. "numBuckets": 5,
  813. "dataFilter": "exclude_outliers",
  814. }
  815. response = self.do_request(query)
  816. assert response.status_code == 200, f"failing for {array_column}"
  817. expected = [
  818. (0, 1, [(f"{alias}.foo", 4)]),
  819. ]
  820. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  821. def test_histogram_missing_measurement_data(self):
  822. # make sure there is at least one transaction
  823. specs = [
  824. (0, 1, [("foo", 1)]),
  825. ]
  826. self.populate_events(specs)
  827. for array_column in ARRAY_COLUMNS:
  828. alias = get_array_column_alias(array_column)
  829. query = {
  830. "project": [self.project.id],
  831. # make sure to query a measurement that does not exist
  832. "field": [f"{alias}.bar"],
  833. "numBuckets": 5,
  834. "dataFilter": "exclude_outliers",
  835. }
  836. response = self.do_request(query)
  837. assert response.status_code == 200, f"failing for {array_column}"
  838. expected = [
  839. (0, 1, [(f"{alias}.bar", 0)]),
  840. (1, 1, [(f"{alias}.bar", 0)]),
  841. (2, 2, [(f"{alias}.bar", 0)]),
  842. (3, 3, [(f"{alias}.bar", 0)]),
  843. (4, 4, [(f"{alias}.bar", 0)]),
  844. ]
  845. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  846. def test_histogram_missing_measurement_data_with_explicit_bounds(self):
  847. # make sure there is at least one transaction
  848. specs = [
  849. (0, 1, [("foo", 1)]),
  850. ]
  851. self.populate_events(specs)
  852. for array_column in ARRAY_COLUMNS:
  853. alias = get_array_column_alias(array_column)
  854. query = {
  855. "project": [self.project.id],
  856. # make sure to query a measurement that does not exist
  857. "field": [f"{alias}.bar"],
  858. "numBuckets": 5,
  859. "dataFilter": "exclude_outliers",
  860. "min": 10,
  861. }
  862. response = self.do_request(query)
  863. assert response.status_code == 200, f"failing for {array_column}"
  864. expected = [
  865. (10, 11, [(f"{alias}.bar", 0)]),
  866. (11, 11, [(f"{alias}.bar", 0)]),
  867. (12, 12, [(f"{alias}.bar", 0)]),
  868. (13, 13, [(f"{alias}.bar", 0)]),
  869. (14, 14, [(f"{alias}.bar", 0)]),
  870. ]
  871. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  872. def test_histogram_ignores_aggregate_conditions(self):
  873. # range is [0, 5), so it is divided into 5 buckets of width 1
  874. specs = [
  875. (0, 1, [("foo", 1)]),
  876. (1, 2, [("foo", 1)]),
  877. (2, 3, [("foo", 1)]),
  878. (3, 4, [("foo", 0)]),
  879. (4, 5, [("foo", 1)]),
  880. ]
  881. self.populate_events(specs)
  882. for array_column in ARRAY_COLUMNS:
  883. alias = get_array_column_alias(array_column)
  884. query = {
  885. "project": [self.project.id],
  886. "field": [f"{alias}.foo"],
  887. "numBuckets": 5,
  888. "query": "tpm():>0.001",
  889. }
  890. response = self.do_request(query)
  891. assert response.status_code == 200, f"failing for {array_column}"
  892. expected = [
  893. (0, 1, [(f"{alias}.foo", 1)]),
  894. (1, 2, [(f"{alias}.foo", 1)]),
  895. (2, 3, [(f"{alias}.foo", 1)]),
  896. (3, 4, [(f"{alias}.foo", 0)]),
  897. (4, 5, [(f"{alias}.foo", 1)]),
  898. ]
  899. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  900. def test_histogram_outlier_filtering_with_no_rows(self):
  901. query = {
  902. "project": [self.project.id],
  903. "field": ["transaction.duration"],
  904. "numBuckets": 5,
  905. "dataFilter": "exclude_outliers",
  906. }
  907. response = self.do_request(query)
  908. assert response.status_code == 200
  909. expected = [
  910. (0, 1, [("transaction.duration", 0)]),
  911. ]
  912. assert response.data == self.as_response_data(expected)
  913. class OrganizationEventsHistogramEndpointTestWithSnql(OrganizationEventsHistogramEndpointTest):
  914. def setUp(self):
  915. super().setUp()
  916. self.min_ago = iso_format(before_now(minutes=1))
  917. self.data = load_data("transaction")
  918. self.features["organizations:performance-use-snql"] = True