test_organization_events_histogram.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017
  1. import random
  2. from collections import namedtuple
  3. from copy import deepcopy
  4. from datetime import timedelta
  5. import pytest
  6. from django.urls import reverse
  7. from rest_framework.exceptions import ErrorDetail
  8. from sentry.testutils import APITestCase, SnubaTestCase
  9. from sentry.testutils.helpers.datetime import before_now, iso_format
  10. from sentry.utils.samples import load_data
  11. from sentry.utils.snuba import get_array_column_alias
  12. HistogramSpec = namedtuple("HistogramSpec", ["start", "end", "fields"])
  13. ARRAY_COLUMNS = ["measurements", "span_op_breakdowns"]
  14. class OrganizationEventsHistogramEndpointTest(APITestCase, SnubaTestCase):
  15. def setUp(self):
  16. super().setUp()
  17. self.min_ago = iso_format(before_now(minutes=1))
  18. self.data = load_data("transaction")
  19. def populate_events(self, specs):
  20. start = before_now(minutes=5)
  21. for spec in specs:
  22. spec = HistogramSpec(*spec)
  23. for suffix_key, count in spec.fields:
  24. for i in range(count):
  25. data = deepcopy(self.data)
  26. measurement_name = suffix_key
  27. breakdown_name = f"ops.{suffix_key}"
  28. data["timestamp"] = iso_format(start)
  29. data["start_timestamp"] = iso_format(start - timedelta(seconds=i))
  30. value = random.random() * (spec.end - spec.start) + spec.start
  31. data["transaction"] = f"/measurement/{measurement_name}/value/{value}"
  32. data["measurements"] = {measurement_name: {"value": value}}
  33. data["breakdowns"] = {
  34. "span_ops": {
  35. breakdown_name: {"value": value},
  36. }
  37. }
  38. self.store_event(data, self.project.id)
  39. def as_response_data(self, specs):
  40. data = {}
  41. for spec in specs:
  42. spec = HistogramSpec(*spec)
  43. for measurement, count in sorted(spec.fields):
  44. if measurement not in data:
  45. data[measurement] = []
  46. data[measurement].append({"bin": spec.start, "count": count})
  47. return data
  48. def do_request(self, query, features=None):
  49. if features is None:
  50. features = {"organizations:performance-view": True}
  51. self.login_as(user=self.user)
  52. url = reverse(
  53. "sentry-api-0-organization-events-histogram",
  54. kwargs={"organization_slug": self.organization.slug},
  55. )
  56. with self.feature(features):
  57. return self.client.get(url, query, format="json")
  58. def test_no_projects(self):
  59. response = self.do_request({})
  60. assert response.status_code == 200, response.content
  61. assert response.data == {}
  62. def test_good_params(self):
  63. for array_column in ARRAY_COLUMNS:
  64. alias = get_array_column_alias(array_column)
  65. query = {
  66. "query": "event.type:transaction",
  67. "project": [self.project.id],
  68. "field": [f"{alias}.foo", f"{alias}.bar"],
  69. "numBuckets": 10,
  70. }
  71. response = self.do_request(query)
  72. assert response.status_code == 200, f"failing for {array_column}"
  73. def test_good_params_with_optionals(self):
  74. for array_column in ARRAY_COLUMNS:
  75. alias = get_array_column_alias(array_column)
  76. query = {
  77. "query": "event.type:transaction",
  78. "project": [self.project.id],
  79. "field": [f"{alias}.foo", f"{alias}.bar"],
  80. "numBuckets": 10,
  81. "precision": 0,
  82. "min": 0,
  83. "max": 10,
  84. }
  85. response = self.do_request(query)
  86. assert response.status_code == 200, f"failing for {array_column}"
  87. def test_bad_params_reverse_min_max(self):
  88. for array_column in ARRAY_COLUMNS:
  89. alias = get_array_column_alias(array_column)
  90. query = {
  91. "query": "event.type:transaction",
  92. "project": [self.project.id],
  93. "field": [f"{alias}.foo", f"{alias}.bar"],
  94. "numBuckets": 10,
  95. "precision": 0,
  96. "min": 10,
  97. "max": 5,
  98. }
  99. response = self.do_request(query)
  100. assert response.data == {"non_field_errors": ["min cannot be greater than max."]}
  101. def test_bad_params_missing_fields(self):
  102. query = {
  103. "project": [self.project.id],
  104. "numBuckets": 10,
  105. }
  106. response = self.do_request(query)
  107. assert response.status_code == 400
  108. assert response.data == {
  109. "field": [ErrorDetail(string="This field is required.", code="required")],
  110. }
  111. def test_bad_params_too_many_fields(self):
  112. query = {
  113. "project": [self.project.id],
  114. "field": ["foo", "bar", "baz", "qux", "quux"],
  115. "numBuckets": 10,
  116. "min": 0,
  117. "max": 100,
  118. "precision": 0,
  119. }
  120. response = self.do_request(query)
  121. assert response.status_code == 400
  122. assert response.data == {
  123. "field": ["Ensure this field has no more than 4 elements."],
  124. }
  125. def test_bad_params_mixed_fields(self):
  126. for array_column in ARRAY_COLUMNS:
  127. for other_array_column in ARRAY_COLUMNS:
  128. query = {
  129. "project": [self.project.id],
  130. "field": [
  131. "foo",
  132. f"{get_array_column_alias(array_column)}.foo",
  133. f"{get_array_column_alias(other_array_column)}.bar",
  134. ],
  135. "numBuckets": 10,
  136. "min": 0,
  137. "max": 100,
  138. "precision": 0,
  139. }
  140. response = self.do_request(query)
  141. assert response.status_code == 400, f"failing for {array_column}"
  142. assert response.data == {
  143. "field": [
  144. "You can only generate histogram for one column at a time unless they are all measurements or all span op breakdowns."
  145. ],
  146. }, f"failing for {array_column}"
  147. def test_bad_params_missing_num_buckets(self):
  148. query = {
  149. "project": [self.project.id],
  150. "field": ["foo"],
  151. }
  152. response = self.do_request(query)
  153. assert response.status_code == 400
  154. assert response.data == {
  155. "numBuckets": ["This field is required."],
  156. }
  157. def test_bad_params_invalid_num_buckets(self):
  158. for array_column in ARRAY_COLUMNS:
  159. alias = get_array_column_alias(array_column)
  160. query = {
  161. "project": [self.project.id],
  162. "field": [f"{alias}.foo", f"{alias}.bar"],
  163. "numBuckets": "baz",
  164. }
  165. response = self.do_request(query)
  166. assert response.status_code == 400, f"failing for {array_column}"
  167. assert response.data == {
  168. "numBuckets": ["A valid integer is required."],
  169. }, f"failing for {array_column}"
  170. def test_bad_params_invalid_negative_num_buckets(self):
  171. for array_column in ARRAY_COLUMNS:
  172. alias = get_array_column_alias(array_column)
  173. query = {
  174. "project": [self.project.id],
  175. "field": [f"{alias}.foo", f"{alias}.bar"],
  176. "numBuckets": -1,
  177. }
  178. response = self.do_request(query)
  179. assert response.status_code == 400, f"failing for {array_column}"
  180. assert response.data == {
  181. "numBuckets": ["Ensure this value is greater than or equal to 1."],
  182. }, f"failing for {array_column}"
  183. def test_bad_params_num_buckets_too_large(self):
  184. for array_column in ARRAY_COLUMNS:
  185. alias = get_array_column_alias(array_column)
  186. query = {
  187. "project": [self.project.id],
  188. "field": [f"{alias}.foo", f"{alias}.bar"],
  189. "numBuckets": 150,
  190. }
  191. response = self.do_request(query)
  192. assert response.status_code == 400, f"failing for {array_column}"
  193. assert response.data == {
  194. "numBuckets": ["Ensure this value is less than or equal to 100."],
  195. }, f"failing for {array_column}"
  196. def test_bad_params_invalid_precision_too_small(self):
  197. for array_column in ARRAY_COLUMNS:
  198. alias = get_array_column_alias(array_column)
  199. query = {
  200. "project": [self.project.id],
  201. "field": [f"{alias}.foo", f"{alias}.bar"],
  202. "numBuckets": 10,
  203. "precision": -1,
  204. }
  205. response = self.do_request(query)
  206. assert response.status_code == 400, f"failing for {array_column}"
  207. assert response.data == {
  208. "precision": ["Ensure this value is greater than or equal to 0."],
  209. }, f"failing for {array_column}"
  210. def test_bad_params_invalid_precision_too_big(self):
  211. for array_column in ARRAY_COLUMNS:
  212. alias = get_array_column_alias(array_column)
  213. query = {
  214. "project": [self.project.id],
  215. "field": [f"{alias}.foo", f"{alias}.bar"],
  216. "numBuckets": 10,
  217. "precision": 100,
  218. }
  219. response = self.do_request(query)
  220. assert response.status_code == 400, f"failing for {array_column}"
  221. assert response.data == {
  222. "precision": ["Ensure this value is less than or equal to 4."],
  223. }, f"failing for {array_column}"
  224. def test_bad_params_invalid_min(self):
  225. for array_column in ARRAY_COLUMNS:
  226. alias = get_array_column_alias(array_column)
  227. query = {
  228. "project": [self.project.id],
  229. "field": [f"{alias}.foo", f"{alias}.bar"],
  230. "numBuckets": 10,
  231. "min": "qux",
  232. }
  233. response = self.do_request(query)
  234. assert response.status_code == 400, f"failing for {array_column}"
  235. assert response.data == {
  236. "min": ["A valid number is required."],
  237. }, f"failing for {array_column}"
  238. def test_bad_params_invalid_max(self):
  239. for array_column in ARRAY_COLUMNS:
  240. alias = get_array_column_alias(array_column)
  241. query = {
  242. "project": [self.project.id],
  243. "field": [f"{alias}.foo", f"{alias}.bar"],
  244. "numBuckets": 10,
  245. "max": "qux",
  246. }
  247. response = self.do_request(query)
  248. assert response.status_code == 400, f"failing for {array_column}"
  249. assert response.data == {
  250. "max": ["A valid number is required."],
  251. }, f"failing for {array_column}"
  252. def test_histogram_empty(self):
  253. for array_column in ARRAY_COLUMNS:
  254. alias = get_array_column_alias(array_column)
  255. query = {
  256. "project": [self.project.id],
  257. "field": [f"{alias}.foo", f"{alias}.bar"],
  258. "numBuckets": 5,
  259. }
  260. response = self.do_request(query)
  261. assert response.status_code == 200, f"failing for {array_column}"
  262. expected = [(i, i + 1, [(f"{alias}.foo", 0), (f"{alias}.bar", 0)]) for i in range(5)]
  263. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  264. def test_histogram_simple(self):
  265. # range is [0, 5), so it is divided into 5 buckets of width 1
  266. specs = [
  267. (0, 1, [("foo", 1)]),
  268. (1, 2, [("foo", 1)]),
  269. (2, 3, [("foo", 1)]),
  270. (4, 5, [("foo", 1)]),
  271. ]
  272. self.populate_events(specs)
  273. for array_column in ARRAY_COLUMNS:
  274. alias = get_array_column_alias(array_column)
  275. query = {
  276. "project": [self.project.id],
  277. "field": [f"{alias}.foo"],
  278. "numBuckets": 5,
  279. }
  280. response = self.do_request(query)
  281. assert response.status_code == 200, f"failing for {array_column}"
  282. expected = [
  283. (0, 1, [(f"{alias}.foo", 1)]),
  284. (1, 2, [(f"{alias}.foo", 1)]),
  285. (2, 3, [(f"{alias}.foo", 1)]),
  286. (3, 4, [(f"{alias}.foo", 0)]),
  287. (4, 5, [(f"{alias}.foo", 1)]),
  288. ]
  289. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  290. def test_histogram_simple_using_min_max(self):
  291. # range is [0, 5), so it is divided into 5 buckets of width 1
  292. specs = [
  293. (0, 1, [("foo", 1)]),
  294. (1, 2, [("foo", 1)]),
  295. (2, 3, [("foo", 1)]),
  296. (4, 5, [("foo", 1)]),
  297. ]
  298. self.populate_events(specs)
  299. for array_column in ARRAY_COLUMNS:
  300. alias = get_array_column_alias(array_column)
  301. query = {
  302. "project": [self.project.id],
  303. "field": [f"{alias}.foo"],
  304. "numBuckets": 5,
  305. "min": 0,
  306. "max": 5,
  307. }
  308. response = self.do_request(query)
  309. assert response.status_code == 200, f"failing for {array_column}"
  310. expected = [
  311. (0, 1, [(f"{alias}.foo", 1)]),
  312. (1, 2, [(f"{alias}.foo", 1)]),
  313. (2, 3, [(f"{alias}.foo", 1)]),
  314. (3, 4, [(f"{alias}.foo", 0)]),
  315. (4, 5, [(f"{alias}.foo", 1)]),
  316. ]
  317. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  318. def test_histogram_simple_using_given_min_above_queried_max(self):
  319. # All these events are out of range of the query parameters,
  320. # and should not appear in the results.
  321. specs = [
  322. (0, 1, [("foo", 1)]),
  323. (1, 2, [("foo", 1)]),
  324. (2, 3, [("foo", 1)]),
  325. (4, 5, [("foo", 1)]),
  326. ]
  327. self.populate_events(specs)
  328. for array_column in ARRAY_COLUMNS:
  329. alias = get_array_column_alias(array_column)
  330. query = {
  331. "project": [self.project.id],
  332. "field": [f"{alias}.foo"],
  333. "numBuckets": 5,
  334. "min": 6,
  335. }
  336. response = self.do_request(query)
  337. assert response.status_code == 200, f"failing for {array_column}"
  338. expected = [
  339. (6, 7, [(f"{alias}.foo", 0)]),
  340. ]
  341. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  342. def test_histogram_simple_using_given_max_below_queried_min(self):
  343. # All these events are out of range of the query parameters,
  344. # and should not appear in the results.
  345. specs = [
  346. (6, 7, [("foo", 1)]),
  347. (8, 9, [("foo", 1)]),
  348. (10, 11, [("foo", 1)]),
  349. (12, 13, [("foo", 1)]),
  350. ]
  351. self.populate_events(specs)
  352. for array_column in ARRAY_COLUMNS:
  353. alias = get_array_column_alias(array_column)
  354. query = {
  355. "project": [self.project.id],
  356. "field": [f"{alias}.foo"],
  357. "numBuckets": 5,
  358. "max": 6,
  359. }
  360. response = self.do_request(query)
  361. assert response.status_code == 200, f"failing for {array_column}"
  362. expected = [
  363. (5, 6, [(f"{alias}.foo", 0)]),
  364. ]
  365. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  366. def test_histogram_large_buckets(self):
  367. # make sure that it works for large width buckets
  368. # range is [0, 99], so it is divided into 5 buckets of width 20
  369. specs = [
  370. (0, 0, [("foo", 2)]),
  371. (99, 99, [("foo", 2)]),
  372. ]
  373. self.populate_events(specs)
  374. for array_column in ARRAY_COLUMNS:
  375. alias = get_array_column_alias(array_column)
  376. query = {
  377. "project": [self.project.id],
  378. "field": [f"{alias}.foo"],
  379. "numBuckets": 5,
  380. }
  381. response = self.do_request(query)
  382. assert response.status_code == 200, f"failing for {array_column}"
  383. expected = [
  384. (0, 20, [(f"{alias}.foo", 2)]),
  385. (20, 40, [(f"{alias}.foo", 0)]),
  386. (40, 60, [(f"{alias}.foo", 0)]),
  387. (60, 80, [(f"{alias}.foo", 0)]),
  388. (80, 100, [(f"{alias}.foo", 2)]),
  389. ]
  390. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  391. def test_histogram_non_zero_offset(self):
  392. # range is [10, 15), so it is divided into 5 buckets of width 1
  393. specs = [
  394. (10, 11, [("foo", 1)]),
  395. (12, 13, [("foo", 1)]),
  396. (13, 14, [("foo", 1)]),
  397. (14, 15, [("foo", 1)]),
  398. ]
  399. self.populate_events(specs)
  400. for array_column in ARRAY_COLUMNS:
  401. alias = get_array_column_alias(array_column)
  402. query = {
  403. "project": [self.project.id],
  404. "field": [f"{alias}.foo"],
  405. "numBuckets": 5,
  406. }
  407. response = self.do_request(query)
  408. assert response.status_code == 200, f"failing for {array_column}"
  409. expected = [
  410. (10, 11, [(f"{alias}.foo", 1)]),
  411. (11, 12, [(f"{alias}.foo", 0)]),
  412. (12, 13, [(f"{alias}.foo", 1)]),
  413. (13, 14, [(f"{alias}.foo", 1)]),
  414. (14, 15, [(f"{alias}.foo", 1)]),
  415. ]
  416. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  417. def test_histogram_extra_data(self):
  418. # range is [11, 16), so it is divided into 5 buckets of width 1
  419. # make sure every bin has some value
  420. specs = [
  421. (10, 11, [("foo", 1)]),
  422. (11, 12, [("foo", 1)]),
  423. (12, 13, [("foo", 1)]),
  424. (13, 14, [("foo", 1)]),
  425. (14, 15, [("foo", 1)]),
  426. (15, 16, [("foo", 1)]),
  427. (16, 17, [("foo", 1)]),
  428. ]
  429. self.populate_events(specs)
  430. for array_column in ARRAY_COLUMNS:
  431. alias = get_array_column_alias(array_column)
  432. query = {
  433. "project": [self.project.id],
  434. "field": [f"{alias}.foo"],
  435. "numBuckets": 5,
  436. "min": 11,
  437. "max": 16,
  438. }
  439. response = self.do_request(query)
  440. assert response.status_code == 200, f"failing for {array_column}"
  441. expected = [
  442. (11, 12, [(f"{alias}.foo", 1)]),
  443. (12, 13, [(f"{alias}.foo", 1)]),
  444. (13, 14, [(f"{alias}.foo", 1)]),
  445. (14, 15, [(f"{alias}.foo", 1)]),
  446. (15, 16, [(f"{alias}.foo", 1)]),
  447. ]
  448. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  449. def test_histogram_non_zero_min_large_buckets(self):
  450. # range is [10, 59], so it is divided into 5 buckets of width 10
  451. specs = [
  452. (10, 10, [("foo", 1)]),
  453. (40, 50, [("foo", 1)]),
  454. (59, 59, [("foo", 2)]),
  455. ]
  456. self.populate_events(specs)
  457. for array_column in ARRAY_COLUMNS:
  458. alias = get_array_column_alias(array_column)
  459. query = {
  460. "project": [self.project.id],
  461. "field": [f"{alias}.foo"],
  462. "numBuckets": 5,
  463. }
  464. response = self.do_request(query)
  465. assert response.status_code == 200, f"failing for {array_column}"
  466. expected = [
  467. (10, 20, [(f"{alias}.foo", 1)]),
  468. (20, 30, [(f"{alias}.foo", 0)]),
  469. (30, 40, [(f"{alias}.foo", 0)]),
  470. (40, 50, [(f"{alias}.foo", 1)]),
  471. (50, 60, [(f"{alias}.foo", 2)]),
  472. ]
  473. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  474. @pytest.mark.xfail(reason="snuba does not allow - in alias names")
  475. def test_histogram_negative_values(self):
  476. # range is [-9, -4), so it is divided into 5 buckets of width 1
  477. specs = [
  478. (-9, -8, [("foo", 3)]),
  479. (-5, -4, [("foo", 1)]),
  480. ]
  481. self.populate_events(specs)
  482. for array_column in ARRAY_COLUMNS:
  483. alias = get_array_column_alias(array_column)
  484. query = {
  485. "project": [self.project.id],
  486. "field": [f"{alias}.foo"],
  487. "numBuckets": 5,
  488. }
  489. response = self.do_request(query)
  490. assert response.status_code == 200, f"failing for {array_column}"
  491. expected = [
  492. (-9, -8, [(f"{alias}.foo", 3)]),
  493. (-8, -7, [(f"{alias}.foo", 0)]),
  494. (-7, -6, [(f"{alias}.foo", 0)]),
  495. (-6, -5, [(f"{alias}.foo", 0)]),
  496. (-5, -4, [(f"{alias}.foo", 1)]),
  497. ]
  498. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  499. @pytest.mark.xfail(reason="snuba does not allow - in alias names")
  500. def test_histogram_positive_and_negative_values(self):
  501. # range is [-50, 49], so it is divided into 5 buckets of width 10
  502. specs = [
  503. (-50, -50, [("foo", 1)]),
  504. (-10, 10, [("foo", 2)]),
  505. (49, 49, [("foo", 1)]),
  506. ]
  507. self.populate_events(specs)
  508. for array_column in ARRAY_COLUMNS:
  509. alias = get_array_column_alias(array_column)
  510. query = {
  511. "project": [self.project.id],
  512. "field": [f"{alias}.foo"],
  513. "numBuckets": 5,
  514. }
  515. response = self.do_request(query)
  516. assert response.status_code == 200, f"failing for {array_column}"
  517. expected = [
  518. (-50, -30, [(f"{alias}.foo", 1)]),
  519. (-30, -10, [(f"{alias}.foo", 0)]),
  520. (-10, 10, [(f"{alias}.foo", 2)]),
  521. (10, 30, [(f"{alias}.foo", 0)]),
  522. (30, 50, [(f"{alias}.foo", 1)]),
  523. ]
  524. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  525. def test_histogram_increased_precision(self):
  526. # range is [1.00, 2.24], so it is divided into 5 buckets of width 0.25
  527. specs = [
  528. (1.00, 1.00, [("foo", 3)]),
  529. (2.24, 2.24, [("foo", 1)]),
  530. ]
  531. self.populate_events(specs)
  532. for array_column in ARRAY_COLUMNS:
  533. alias = get_array_column_alias(array_column)
  534. query = {
  535. "project": [self.project.id],
  536. "field": [f"{alias}.foo"],
  537. "numBuckets": 5,
  538. "precision": 2,
  539. }
  540. response = self.do_request(query)
  541. assert response.status_code == 200, f"failing for {array_column}"
  542. expected = [
  543. (1.00, 1.25, [(f"{alias}.foo", 3)]),
  544. (1.25, 1.50, [(f"{alias}.foo", 0)]),
  545. (1.50, 1.75, [(f"{alias}.foo", 0)]),
  546. (1.75, 2.00, [(f"{alias}.foo", 0)]),
  547. (2.00, 2.25, [(f"{alias}.foo", 1)]),
  548. ]
  549. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  550. def test_histogram_increased_precision_with_min_max(self):
  551. # range is [1.25, 2.24], so it is divided into 5 buckets of width 0.25
  552. specs = [
  553. (1.00, 1.25, [("foo", 3)]),
  554. (2.00, 2.25, [("foo", 1)]),
  555. ]
  556. self.populate_events(specs)
  557. for array_column in ARRAY_COLUMNS:
  558. alias = get_array_column_alias(array_column)
  559. query = {
  560. "project": [self.project.id],
  561. "field": [f"{alias}.foo"],
  562. "numBuckets": 3,
  563. "precision": 2,
  564. "min": 1.25,
  565. "max": 2.00,
  566. }
  567. response = self.do_request(query)
  568. assert response.status_code == 200, f"failing for {array_column}"
  569. expected = [
  570. (1.25, 1.50, [(f"{alias}.foo", 0)]),
  571. (1.50, 1.75, [(f"{alias}.foo", 0)]),
  572. (1.75, 2.00, [(f"{alias}.foo", 0)]),
  573. ]
  574. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  575. def test_histogram_increased_precision_large_buckets(self):
  576. # range is [10.0000, 59.9999] so it is divided into 5 buckets of width 10
  577. specs = [
  578. (10.0000, 10.0000, [("foo", 1)]),
  579. (30.0000, 40.0000, [("foo", 1)]),
  580. (59.9999, 59.9999, [("foo", 2)]),
  581. ]
  582. self.populate_events(specs)
  583. for array_column in ARRAY_COLUMNS:
  584. alias = get_array_column_alias(array_column)
  585. query = {
  586. "project": [self.project.id],
  587. "field": [f"{alias}.foo"],
  588. "numBuckets": 5,
  589. "precision": 4,
  590. }
  591. response = self.do_request(query)
  592. assert response.status_code == 200, f"failing for {array_column}"
  593. expected = [
  594. (10.0000, 20.0000, [(f"{alias}.foo", 1)]),
  595. (20.0000, 30.0000, [(f"{alias}.foo", 0)]),
  596. (30.0000, 40.0000, [(f"{alias}.foo", 1)]),
  597. (40.0000, 50.0000, [(f"{alias}.foo", 0)]),
  598. (50.0000, 60.0000, [(f"{alias}.foo", 2)]),
  599. ]
  600. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  601. def test_histogram_multiple_measures(self):
  602. # range is [10, 59] so it is divided into 5 buckets of width 10
  603. specs = [
  604. (10, 10, [("bar", 0), ("baz", 0), ("foo", 1)]),
  605. (30, 40, [("bar", 2), ("baz", 0), ("foo", 0)]),
  606. (59, 59, [("bar", 0), ("baz", 1), ("foo", 0)]),
  607. ]
  608. self.populate_events(specs)
  609. for array_column in ARRAY_COLUMNS:
  610. alias = get_array_column_alias(array_column)
  611. query = {
  612. "project": [self.project.id],
  613. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  614. "numBuckets": 5,
  615. }
  616. response = self.do_request(query)
  617. assert response.status_code == 200, f"failing for {array_column}"
  618. expected = [
  619. (
  620. 10,
  621. 20,
  622. [
  623. (f"{alias}.bar", 0),
  624. (f"{alias}.baz", 0),
  625. (f"{alias}.foo", 1),
  626. ],
  627. ),
  628. (
  629. 20,
  630. 30,
  631. [
  632. (f"{alias}.bar", 0),
  633. (f"{alias}.baz", 0),
  634. (f"{alias}.foo", 0),
  635. ],
  636. ),
  637. (
  638. 30,
  639. 40,
  640. [
  641. (f"{alias}.bar", 2),
  642. (f"{alias}.baz", 0),
  643. (f"{alias}.foo", 0),
  644. ],
  645. ),
  646. (
  647. 40,
  648. 50,
  649. [
  650. (f"{alias}.bar", 0),
  651. (f"{alias}.baz", 0),
  652. (f"{alias}.foo", 0),
  653. ],
  654. ),
  655. (
  656. 50,
  657. 60,
  658. [
  659. (f"{alias}.bar", 0),
  660. (f"{alias}.baz", 1),
  661. (f"{alias}.foo", 0),
  662. ],
  663. ),
  664. ]
  665. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  666. def test_histogram_max_value_on_edge(self):
  667. # range is [11, 21] so it is divided into 5 buckets of width 5
  668. # because using buckets of width 2 will exclude 21, and the next
  669. # nice number is 5
  670. specs = [
  671. (11, 11, [("bar", 0), ("baz", 0), ("foo", 1)]),
  672. (21, 21, [("bar", 1), ("baz", 1), ("foo", 1)]),
  673. ]
  674. self.populate_events(specs)
  675. for array_column in ARRAY_COLUMNS:
  676. alias = get_array_column_alias(array_column)
  677. query = {
  678. "project": [self.project.id],
  679. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  680. "numBuckets": 5,
  681. }
  682. response = self.do_request(query)
  683. assert response.status_code == 200, f"failing for {array_column}"
  684. expected = [
  685. (
  686. 10,
  687. 15,
  688. [
  689. (f"{alias}.bar", 0),
  690. (f"{alias}.baz", 0),
  691. (f"{alias}.foo", 1),
  692. ],
  693. ),
  694. (
  695. 15,
  696. 20,
  697. [
  698. (f"{alias}.bar", 0),
  699. (f"{alias}.baz", 0),
  700. (f"{alias}.foo", 0),
  701. ],
  702. ),
  703. (
  704. 20,
  705. 25,
  706. [
  707. (f"{alias}.bar", 1),
  708. (f"{alias}.baz", 1),
  709. (f"{alias}.foo", 1),
  710. ],
  711. ),
  712. ]
  713. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  714. def test_histogram_bins_exceed_max(self):
  715. specs = [
  716. (10, 15, [("bar", 0), ("baz", 0), ("foo", 1)]),
  717. (30, 30, [("bar", 1), ("baz", 1), ("foo", 1)]),
  718. ]
  719. self.populate_events(specs)
  720. for array_column in ARRAY_COLUMNS:
  721. alias = get_array_column_alias(array_column)
  722. query = {
  723. "project": [self.project.id],
  724. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  725. "numBuckets": 5,
  726. "min": 10,
  727. "max": 21,
  728. }
  729. response = self.do_request(query)
  730. assert response.status_code == 200, f"failing for {array_column}"
  731. expected = [
  732. (
  733. 10,
  734. 15,
  735. [
  736. (f"{alias}.bar", 0),
  737. (f"{alias}.baz", 0),
  738. (f"{alias}.foo", 1),
  739. ],
  740. ),
  741. (
  742. 15,
  743. 20,
  744. [
  745. (f"{alias}.bar", 0),
  746. (f"{alias}.baz", 0),
  747. (f"{alias}.foo", 0),
  748. ],
  749. ),
  750. (
  751. 20,
  752. 25,
  753. [
  754. (f"{alias}.bar", 0),
  755. (f"{alias}.baz", 0),
  756. (f"{alias}.foo", 0),
  757. ],
  758. ),
  759. ]
  760. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  761. def test_bad_params_invalid_data_filter(self):
  762. for array_column in ARRAY_COLUMNS:
  763. alias = get_array_column_alias(array_column)
  764. query = {
  765. "project": [self.project.id],
  766. "field": [f"{alias}.foo", f"{alias}.bar"],
  767. "numBuckets": 10,
  768. "dataFilter": "invalid",
  769. }
  770. response = self.do_request(query)
  771. assert response.status_code == 400, f"failing for {array_column}"
  772. assert response.data == {
  773. "dataFilter": ['"invalid" is not a valid choice.'],
  774. }, f"failing for {array_column}"
  775. def test_histogram_all_data_filter(self):
  776. specs = [
  777. (0, 1, [("foo", 4)]),
  778. (4000, 5000, [("foo", 1)]),
  779. ]
  780. self.populate_events(specs)
  781. for array_column in ARRAY_COLUMNS:
  782. alias = get_array_column_alias(array_column)
  783. query = {
  784. "project": [self.project.id],
  785. "field": [f"{alias}.foo"],
  786. "numBuckets": 5,
  787. "dataFilter": "all",
  788. }
  789. response = self.do_request(query)
  790. assert response.status_code == 200, f"failing for {array_column}"
  791. expected = [
  792. (0, 1000, [(f"{alias}.foo", 4)]),
  793. (1000, 2000, [(f"{alias}.foo", 0)]),
  794. (2000, 3000, [(f"{alias}.foo", 0)]),
  795. (3000, 4000, [(f"{alias}.foo", 0)]),
  796. (4000, 5000, [(f"{alias}.foo", 1)]),
  797. ]
  798. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  799. def test_histogram_exclude_outliers_data_filter(self):
  800. specs = [
  801. (0, 0, [("foo", 4)]),
  802. (4000, 4001, [("foo", 1)]),
  803. ]
  804. self.populate_events(specs)
  805. for array_column in ARRAY_COLUMNS:
  806. alias = get_array_column_alias(array_column)
  807. query = {
  808. "project": [self.project.id],
  809. "field": [f"{alias}.foo"],
  810. "numBuckets": 5,
  811. "dataFilter": "exclude_outliers",
  812. }
  813. response = self.do_request(query)
  814. assert response.status_code == 200, f"failing for {array_column}"
  815. expected = [
  816. (0, 1, [(f"{alias}.foo", 4)]),
  817. ]
  818. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  819. def test_histogram_missing_measurement_data(self):
  820. # make sure there is at least one transaction
  821. specs = [
  822. (0, 1, [("foo", 1)]),
  823. ]
  824. self.populate_events(specs)
  825. for array_column in ARRAY_COLUMNS:
  826. alias = get_array_column_alias(array_column)
  827. query = {
  828. "project": [self.project.id],
  829. # make sure to query a measurement that does not exist
  830. "field": [f"{alias}.bar"],
  831. "numBuckets": 5,
  832. "dataFilter": "exclude_outliers",
  833. }
  834. response = self.do_request(query)
  835. assert response.status_code == 200, f"failing for {array_column}"
  836. expected = [
  837. (0, 1, [(f"{alias}.bar", 0)]),
  838. (1, 1, [(f"{alias}.bar", 0)]),
  839. (2, 2, [(f"{alias}.bar", 0)]),
  840. (3, 3, [(f"{alias}.bar", 0)]),
  841. (4, 4, [(f"{alias}.bar", 0)]),
  842. ]
  843. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  844. def test_histogram_missing_measurement_data_with_explicit_bounds(self):
  845. # make sure there is at least one transaction
  846. specs = [
  847. (0, 1, [("foo", 1)]),
  848. ]
  849. self.populate_events(specs)
  850. for array_column in ARRAY_COLUMNS:
  851. alias = get_array_column_alias(array_column)
  852. query = {
  853. "project": [self.project.id],
  854. # make sure to query a measurement that does not exist
  855. "field": [f"{alias}.bar"],
  856. "numBuckets": 5,
  857. "dataFilter": "exclude_outliers",
  858. "min": 10,
  859. }
  860. response = self.do_request(query)
  861. assert response.status_code == 200, f"failing for {array_column}"
  862. expected = [
  863. (10, 11, [(f"{alias}.bar", 0)]),
  864. (11, 11, [(f"{alias}.bar", 0)]),
  865. (12, 12, [(f"{alias}.bar", 0)]),
  866. (13, 13, [(f"{alias}.bar", 0)]),
  867. (14, 14, [(f"{alias}.bar", 0)]),
  868. ]
  869. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  870. def test_histogram_ignores_aggregate_conditions(self):
  871. # range is [0, 5), so it is divided into 5 buckets of width 1
  872. specs = [
  873. (0, 1, [("foo", 1)]),
  874. (1, 2, [("foo", 1)]),
  875. (2, 3, [("foo", 1)]),
  876. (3, 4, [("foo", 0)]),
  877. (4, 5, [("foo", 1)]),
  878. ]
  879. self.populate_events(specs)
  880. for array_column in ARRAY_COLUMNS:
  881. alias = get_array_column_alias(array_column)
  882. query = {
  883. "project": [self.project.id],
  884. "field": [f"{alias}.foo"],
  885. "numBuckets": 5,
  886. "query": "tpm():>0.001",
  887. }
  888. response = self.do_request(query)
  889. assert response.status_code == 200, f"failing for {array_column}"
  890. expected = [
  891. (0, 1, [(f"{alias}.foo", 1)]),
  892. (1, 2, [(f"{alias}.foo", 1)]),
  893. (2, 3, [(f"{alias}.foo", 1)]),
  894. (3, 4, [(f"{alias}.foo", 0)]),
  895. (4, 5, [(f"{alias}.foo", 1)]),
  896. ]
  897. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  898. def test_histogram_outlier_filtering_with_no_rows(self):
  899. query = {
  900. "project": [self.project.id],
  901. "field": ["transaction.duration"],
  902. "numBuckets": 5,
  903. "dataFilter": "exclude_outliers",
  904. }
  905. response = self.do_request(query)
  906. assert response.status_code == 200
  907. expected = [
  908. (0, 1, [("transaction.duration", 0)]),
  909. ]
  910. assert response.data == self.as_response_data(expected)