test_organization_events_histogram.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. from __future__ import annotations
  2. import random
  3. from collections import namedtuple
  4. from copy import deepcopy
  5. from datetime import timedelta
  6. import pytest
  7. from django.urls import reverse
  8. from rest_framework.exceptions import ErrorDetail
  9. from sentry.sentry_metrics.aggregation_option_registry import AggregationOption
  10. from sentry.testutils.cases import APITestCase, MetricsEnhancedPerformanceTestCase, SnubaTestCase
  11. from sentry.testutils.helpers.datetime import before_now, iso_format
  12. from sentry.utils.samples import load_data
  13. from sentry.utils.snuba import get_array_column_alias
  14. pytestmark = pytest.mark.sentry_metrics
  15. HistogramSpec = namedtuple(
  16. "HistogramSpec", ["start", "end", "fields", "tags"], defaults=[None, None, [], {}]
  17. )
  18. ARRAY_COLUMNS = ["measurements", "span_op_breakdowns"]
  19. class OrganizationEventsHistogramEndpointTest(APITestCase, SnubaTestCase):
  20. def setUp(self):
  21. super().setUp()
  22. self.min_ago = iso_format(before_now(minutes=1))
  23. self.data = load_data("transaction")
  24. self.features = {}
  25. def populate_events(self, specs):
  26. start = before_now(minutes=5)
  27. for spec in specs:
  28. spec = HistogramSpec(*spec)
  29. for suffix_key, count in spec.fields:
  30. for i in range(count):
  31. data = deepcopy(self.data)
  32. measurement_name = suffix_key
  33. breakdown_name = f"ops.{suffix_key}"
  34. data["timestamp"] = iso_format(start)
  35. data["start_timestamp"] = iso_format(start - timedelta(seconds=i))
  36. value = random.random() * (spec.end - spec.start) + spec.start
  37. data["transaction"] = f"/measurement/{measurement_name}/value/{value}"
  38. data["measurements"] = {measurement_name: {"value": value}}
  39. data["breakdowns"] = {
  40. "span_ops": {
  41. breakdown_name: {"value": value},
  42. }
  43. }
  44. self.store_event(data, self.project.id)
  45. def as_response_data(self, specs):
  46. data: dict[str, list[dict[str, int]]] = {}
  47. for spec in specs:
  48. spec = HistogramSpec(*spec)
  49. for measurement, count in sorted(spec.fields):
  50. if measurement not in data:
  51. data[measurement] = []
  52. data[measurement].append({"bin": spec.start, "count": count})
  53. return data
  54. def do_request(self, query, features=None):
  55. if features is None:
  56. features = {"organizations:performance-view": True}
  57. features.update(self.features)
  58. self.login_as(user=self.user)
  59. url = reverse(
  60. "sentry-api-0-organization-events-histogram",
  61. kwargs={"organization_id_or_slug": self.organization.slug},
  62. )
  63. with self.feature(features):
  64. return self.client.get(url, query, format="json")
  65. def test_no_projects(self):
  66. response = self.do_request({})
  67. assert response.status_code == 200, response.content
  68. assert response.data == {}
  69. @pytest.mark.querybuilder
  70. def test_good_params(self):
  71. for array_column in ARRAY_COLUMNS:
  72. alias = get_array_column_alias(array_column)
  73. query = {
  74. "query": "event.type:transaction",
  75. "project": [self.project.id],
  76. "field": [f"{alias}.foo", f"{alias}.bar"],
  77. "numBuckets": 10,
  78. }
  79. response = self.do_request(query)
  80. assert response.status_code == 200, f"failing for {array_column}"
  81. def test_good_params_with_optionals(self):
  82. for array_column in ARRAY_COLUMNS:
  83. alias = get_array_column_alias(array_column)
  84. query = {
  85. "query": "event.type:transaction",
  86. "project": [self.project.id],
  87. "field": [f"{alias}.foo", f"{alias}.bar"],
  88. "numBuckets": 10,
  89. "precision": 0,
  90. "min": 0,
  91. "max": 10,
  92. }
  93. response = self.do_request(query)
  94. assert response.status_code == 200, f"failing for {array_column}"
  95. def test_bad_params_reverse_min_max(self):
  96. for array_column in ARRAY_COLUMNS:
  97. alias = get_array_column_alias(array_column)
  98. query = {
  99. "query": "event.type:transaction",
  100. "project": [self.project.id],
  101. "field": [f"{alias}.foo", f"{alias}.bar"],
  102. "numBuckets": 10,
  103. "precision": 0,
  104. "min": 10,
  105. "max": 5,
  106. }
  107. response = self.do_request(query)
  108. assert response.data == {"non_field_errors": ["min cannot be greater than max."]}
  109. def test_bad_params_missing_fields(self):
  110. query = {
  111. "project": [self.project.id],
  112. "numBuckets": 10,
  113. }
  114. response = self.do_request(query)
  115. assert response.status_code == 400
  116. assert response.data == {
  117. "field": [ErrorDetail(string="This field is required.", code="required")],
  118. }
  119. def test_bad_params_too_many_fields(self):
  120. query = {
  121. "project": [self.project.id],
  122. "field": ["foo", "bar", "baz", "qux", "quux"],
  123. "numBuckets": 10,
  124. "min": 0,
  125. "max": 100,
  126. "precision": 0,
  127. }
  128. response = self.do_request(query)
  129. assert response.status_code == 400
  130. assert response.data == {
  131. "field": ["Ensure this field has no more than 4 elements."],
  132. }
  133. def test_bad_params_mixed_fields(self):
  134. for array_column in ARRAY_COLUMNS:
  135. for other_array_column in ARRAY_COLUMNS:
  136. query = {
  137. "project": [self.project.id],
  138. "field": [
  139. "foo",
  140. f"{get_array_column_alias(array_column)}.foo",
  141. f"{get_array_column_alias(other_array_column)}.bar",
  142. ],
  143. "numBuckets": 10,
  144. "min": 0,
  145. "max": 100,
  146. "precision": 0,
  147. }
  148. response = self.do_request(query)
  149. assert response.status_code == 400, f"failing for {array_column}"
  150. assert response.data == {
  151. "field": [
  152. "You can only generate histogram for one column at a time unless they are all measurements or all span op breakdowns."
  153. ],
  154. }, f"failing for {array_column}"
  155. def test_bad_params_missing_num_buckets(self):
  156. query = {
  157. "project": [self.project.id],
  158. "field": ["foo"],
  159. }
  160. response = self.do_request(query)
  161. assert response.status_code == 400
  162. assert response.data == {
  163. "numBuckets": ["This field is required."],
  164. }
  165. def test_bad_params_invalid_num_buckets(self):
  166. for array_column in ARRAY_COLUMNS:
  167. alias = get_array_column_alias(array_column)
  168. query = {
  169. "project": [self.project.id],
  170. "field": [f"{alias}.foo", f"{alias}.bar"],
  171. "numBuckets": "baz",
  172. }
  173. response = self.do_request(query)
  174. assert response.status_code == 400, f"failing for {array_column}"
  175. assert response.data == {
  176. "numBuckets": ["A valid integer is required."],
  177. }, f"failing for {array_column}"
  178. def test_bad_params_invalid_negative_num_buckets(self):
  179. for array_column in ARRAY_COLUMNS:
  180. alias = get_array_column_alias(array_column)
  181. query = {
  182. "project": [self.project.id],
  183. "field": [f"{alias}.foo", f"{alias}.bar"],
  184. "numBuckets": -1,
  185. }
  186. response = self.do_request(query)
  187. assert response.status_code == 400, f"failing for {array_column}"
  188. assert response.data == {
  189. "numBuckets": ["Ensure this value is greater than or equal to 1."],
  190. }, f"failing for {array_column}"
  191. def test_bad_params_num_buckets_too_large(self):
  192. for array_column in ARRAY_COLUMNS:
  193. alias = get_array_column_alias(array_column)
  194. query = {
  195. "project": [self.project.id],
  196. "field": [f"{alias}.foo", f"{alias}.bar"],
  197. "numBuckets": 150,
  198. }
  199. response = self.do_request(query)
  200. assert response.status_code == 400, f"failing for {array_column}"
  201. assert response.data == {
  202. "numBuckets": ["Ensure this value is less than or equal to 100."],
  203. }, f"failing for {array_column}"
  204. def test_bad_params_invalid_precision_too_small(self):
  205. for array_column in ARRAY_COLUMNS:
  206. alias = get_array_column_alias(array_column)
  207. query = {
  208. "project": [self.project.id],
  209. "field": [f"{alias}.foo", f"{alias}.bar"],
  210. "numBuckets": 10,
  211. "precision": -1,
  212. }
  213. response = self.do_request(query)
  214. assert response.status_code == 400, f"failing for {array_column}"
  215. assert response.data == {
  216. "precision": ["Ensure this value is greater than or equal to 0."],
  217. }, f"failing for {array_column}"
  218. def test_bad_params_invalid_precision_too_big(self):
  219. for array_column in ARRAY_COLUMNS:
  220. alias = get_array_column_alias(array_column)
  221. query = {
  222. "project": [self.project.id],
  223. "field": [f"{alias}.foo", f"{alias}.bar"],
  224. "numBuckets": 10,
  225. "precision": 100,
  226. }
  227. response = self.do_request(query)
  228. assert response.status_code == 400, f"failing for {array_column}"
  229. assert response.data == {
  230. "precision": ["Ensure this value is less than or equal to 4."],
  231. }, f"failing for {array_column}"
  232. def test_bad_params_invalid_min(self):
  233. for array_column in ARRAY_COLUMNS:
  234. alias = get_array_column_alias(array_column)
  235. query = {
  236. "project": [self.project.id],
  237. "field": [f"{alias}.foo", f"{alias}.bar"],
  238. "numBuckets": 10,
  239. "min": "qux",
  240. }
  241. response = self.do_request(query)
  242. assert response.status_code == 400, f"failing for {array_column}"
  243. assert response.data == {
  244. "min": ["A valid number is required."],
  245. }, f"failing for {array_column}"
  246. def test_bad_params_invalid_max(self):
  247. for array_column in ARRAY_COLUMNS:
  248. alias = get_array_column_alias(array_column)
  249. query = {
  250. "project": [self.project.id],
  251. "field": [f"{alias}.foo", f"{alias}.bar"],
  252. "numBuckets": 10,
  253. "max": "qux",
  254. }
  255. response = self.do_request(query)
  256. assert response.status_code == 400, f"failing for {array_column}"
  257. assert response.data == {
  258. "max": ["A valid number is required."],
  259. }, f"failing for {array_column}"
  260. def test_histogram_empty(self):
  261. for array_column in ARRAY_COLUMNS:
  262. alias = get_array_column_alias(array_column)
  263. query = {
  264. "project": [self.project.id],
  265. "field": [f"{alias}.foo", f"{alias}.bar"],
  266. "numBuckets": 5,
  267. }
  268. response = self.do_request(query)
  269. assert response.status_code == 200, f"failing for {array_column}"
  270. expected = [(i, i + 1, [(f"{alias}.foo", 0), (f"{alias}.bar", 0)]) for i in range(5)]
  271. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  272. def test_histogram_simple(self):
  273. # range is [0, 5), so it is divided into 5 buckets of width 1
  274. specs = [
  275. (0, 1, [("foo", 1)]),
  276. (1, 2, [("foo", 1)]),
  277. (2, 3, [("foo", 1)]),
  278. (4, 5, [("foo", 1)]),
  279. ]
  280. self.populate_events(specs)
  281. for array_column in ARRAY_COLUMNS:
  282. alias = get_array_column_alias(array_column)
  283. query = {
  284. "project": [self.project.id],
  285. "field": [f"{alias}.foo"],
  286. "numBuckets": 5,
  287. }
  288. response = self.do_request(query)
  289. assert response.status_code == 200, f"failing for {array_column}"
  290. expected = [
  291. (0, 1, [(f"{alias}.foo", 1)]),
  292. (1, 2, [(f"{alias}.foo", 1)]),
  293. (2, 3, [(f"{alias}.foo", 1)]),
  294. (3, 4, [(f"{alias}.foo", 0)]),
  295. (4, 5, [(f"{alias}.foo", 1)]),
  296. ]
  297. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  298. def test_histogram_simple_using_min_max(self):
  299. # range is [0, 5), so it is divided into 5 buckets of width 1
  300. specs = [
  301. (0, 1, [("foo", 1)]),
  302. (1, 2, [("foo", 1)]),
  303. (2, 3, [("foo", 1)]),
  304. (4, 5, [("foo", 1)]),
  305. ]
  306. self.populate_events(specs)
  307. for array_column in ARRAY_COLUMNS:
  308. alias = get_array_column_alias(array_column)
  309. query = {
  310. "project": [self.project.id],
  311. "field": [f"{alias}.foo"],
  312. "numBuckets": 5,
  313. "min": 0,
  314. "max": 5,
  315. }
  316. response = self.do_request(query)
  317. assert response.status_code == 200, f"failing for {array_column}"
  318. expected = [
  319. (0, 1, [(f"{alias}.foo", 1)]),
  320. (1, 2, [(f"{alias}.foo", 1)]),
  321. (2, 3, [(f"{alias}.foo", 1)]),
  322. (3, 4, [(f"{alias}.foo", 0)]),
  323. (4, 5, [(f"{alias}.foo", 1)]),
  324. ]
  325. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  326. def test_histogram_simple_using_given_min_above_queried_max(self):
  327. # All these events are out of range of the query parameters,
  328. # and should not appear in the results.
  329. specs = [
  330. (0, 1, [("foo", 1)]),
  331. (1, 2, [("foo", 1)]),
  332. (2, 3, [("foo", 1)]),
  333. (4, 5, [("foo", 1)]),
  334. ]
  335. self.populate_events(specs)
  336. for array_column in ARRAY_COLUMNS:
  337. alias = get_array_column_alias(array_column)
  338. query = {
  339. "project": [self.project.id],
  340. "field": [f"{alias}.foo"],
  341. "numBuckets": 5,
  342. "min": 6,
  343. }
  344. response = self.do_request(query)
  345. assert response.status_code == 200, f"failing for {array_column}"
  346. expected = [
  347. (6, 7, [(f"{alias}.foo", 0)]),
  348. ]
  349. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  350. def test_histogram_simple_using_given_max_below_queried_min(self):
  351. # All these events are out of range of the query parameters,
  352. # and should not appear in the results.
  353. specs = [
  354. (6, 7, [("foo", 1)]),
  355. (8, 9, [("foo", 1)]),
  356. (10, 11, [("foo", 1)]),
  357. (12, 13, [("foo", 1)]),
  358. ]
  359. self.populate_events(specs)
  360. for array_column in ARRAY_COLUMNS:
  361. alias = get_array_column_alias(array_column)
  362. query = {
  363. "project": [self.project.id],
  364. "field": [f"{alias}.foo"],
  365. "numBuckets": 5,
  366. "max": 6,
  367. }
  368. response = self.do_request(query)
  369. assert response.status_code == 200, f"failing for {array_column}"
  370. expected = [
  371. (5, 6, [(f"{alias}.foo", 0)]),
  372. ]
  373. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  374. def test_histogram_large_buckets(self):
  375. # make sure that it works for large width buckets
  376. # range is [0, 99], so it is divided into 5 buckets of width 20
  377. specs = [
  378. (0, 0, [("foo", 2)]),
  379. (99, 99, [("foo", 2)]),
  380. ]
  381. self.populate_events(specs)
  382. for array_column in ARRAY_COLUMNS:
  383. alias = get_array_column_alias(array_column)
  384. query = {
  385. "project": [self.project.id],
  386. "field": [f"{alias}.foo"],
  387. "numBuckets": 5,
  388. }
  389. response = self.do_request(query)
  390. assert response.status_code == 200, f"failing for {array_column}"
  391. expected = [
  392. (0, 20, [(f"{alias}.foo", 2)]),
  393. (20, 40, [(f"{alias}.foo", 0)]),
  394. (40, 60, [(f"{alias}.foo", 0)]),
  395. (60, 80, [(f"{alias}.foo", 0)]),
  396. (80, 100, [(f"{alias}.foo", 2)]),
  397. ]
  398. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  399. def test_histogram_non_zero_offset(self):
  400. # range is [10, 15), so it is divided into 5 buckets of width 1
  401. specs = [
  402. (10, 11, [("foo", 1)]),
  403. (12, 13, [("foo", 1)]),
  404. (13, 14, [("foo", 1)]),
  405. (14, 15, [("foo", 1)]),
  406. ]
  407. self.populate_events(specs)
  408. for array_column in ARRAY_COLUMNS:
  409. alias = get_array_column_alias(array_column)
  410. query = {
  411. "project": [self.project.id],
  412. "field": [f"{alias}.foo"],
  413. "numBuckets": 5,
  414. }
  415. response = self.do_request(query)
  416. assert response.status_code == 200, f"failing for {array_column}"
  417. expected = [
  418. (10, 11, [(f"{alias}.foo", 1)]),
  419. (11, 12, [(f"{alias}.foo", 0)]),
  420. (12, 13, [(f"{alias}.foo", 1)]),
  421. (13, 14, [(f"{alias}.foo", 1)]),
  422. (14, 15, [(f"{alias}.foo", 1)]),
  423. ]
  424. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  425. def test_histogram_extra_data(self):
  426. # range is [11, 16), so it is divided into 5 buckets of width 1
  427. # make sure every bin has some value
  428. specs = [
  429. (10, 11, [("foo", 1)]),
  430. (11, 12, [("foo", 1)]),
  431. (12, 13, [("foo", 1)]),
  432. (13, 14, [("foo", 1)]),
  433. (14, 15, [("foo", 1)]),
  434. (15, 16, [("foo", 1)]),
  435. (16, 17, [("foo", 1)]),
  436. ]
  437. self.populate_events(specs)
  438. for array_column in ARRAY_COLUMNS:
  439. alias = get_array_column_alias(array_column)
  440. query = {
  441. "project": [self.project.id],
  442. "field": [f"{alias}.foo"],
  443. "numBuckets": 5,
  444. "min": 11,
  445. "max": 16,
  446. }
  447. response = self.do_request(query)
  448. assert response.status_code == 200, f"failing for {array_column}"
  449. expected = [
  450. (11, 12, [(f"{alias}.foo", 1)]),
  451. (12, 13, [(f"{alias}.foo", 1)]),
  452. (13, 14, [(f"{alias}.foo", 1)]),
  453. (14, 15, [(f"{alias}.foo", 1)]),
  454. (15, 16, [(f"{alias}.foo", 1)]),
  455. ]
  456. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  457. def test_histogram_non_zero_min_large_buckets(self):
  458. # range is [10, 59], so it is divided into 5 buckets of width 10
  459. specs = [
  460. (10, 10, [("foo", 1)]),
  461. (40, 50, [("foo", 1)]),
  462. (59, 59, [("foo", 2)]),
  463. ]
  464. self.populate_events(specs)
  465. for array_column in ARRAY_COLUMNS:
  466. alias = get_array_column_alias(array_column)
  467. query = {
  468. "project": [self.project.id],
  469. "field": [f"{alias}.foo"],
  470. "numBuckets": 5,
  471. }
  472. response = self.do_request(query)
  473. assert response.status_code == 200, f"failing for {array_column}"
  474. expected = [
  475. (10, 20, [(f"{alias}.foo", 1)]),
  476. (20, 30, [(f"{alias}.foo", 0)]),
  477. (30, 40, [(f"{alias}.foo", 0)]),
  478. (40, 50, [(f"{alias}.foo", 1)]),
  479. (50, 60, [(f"{alias}.foo", 2)]),
  480. ]
  481. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  482. @pytest.mark.xfail(reason="snuba does not allow - in alias names")
  483. def test_histogram_negative_values(self):
  484. # range is [-9, -4), so it is divided into 5 buckets of width 1
  485. specs = [
  486. (-9, -8, [("foo", 3)]),
  487. (-5, -4, [("foo", 1)]),
  488. ]
  489. self.populate_events(specs)
  490. for array_column in ARRAY_COLUMNS:
  491. alias = get_array_column_alias(array_column)
  492. query = {
  493. "project": [self.project.id],
  494. "field": [f"{alias}.foo"],
  495. "numBuckets": 5,
  496. }
  497. response = self.do_request(query)
  498. assert response.status_code == 200, f"failing for {array_column}"
  499. expected = [
  500. (-9, -8, [(f"{alias}.foo", 3)]),
  501. (-8, -7, [(f"{alias}.foo", 0)]),
  502. (-7, -6, [(f"{alias}.foo", 0)]),
  503. (-6, -5, [(f"{alias}.foo", 0)]),
  504. (-5, -4, [(f"{alias}.foo", 1)]),
  505. ]
  506. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  507. @pytest.mark.xfail(reason="snuba does not allow - in alias names")
  508. def test_histogram_positive_and_negative_values(self):
  509. # range is [-50, 49], so it is divided into 5 buckets of width 10
  510. specs = [
  511. (-50, -50, [("foo", 1)]),
  512. (-10, 10, [("foo", 2)]),
  513. (49, 49, [("foo", 1)]),
  514. ]
  515. self.populate_events(specs)
  516. for array_column in ARRAY_COLUMNS:
  517. alias = get_array_column_alias(array_column)
  518. query = {
  519. "project": [self.project.id],
  520. "field": [f"{alias}.foo"],
  521. "numBuckets": 5,
  522. }
  523. response = self.do_request(query)
  524. assert response.status_code == 200, f"failing for {array_column}"
  525. expected = [
  526. (-50, -30, [(f"{alias}.foo", 1)]),
  527. (-30, -10, [(f"{alias}.foo", 0)]),
  528. (-10, 10, [(f"{alias}.foo", 2)]),
  529. (10, 30, [(f"{alias}.foo", 0)]),
  530. (30, 50, [(f"{alias}.foo", 1)]),
  531. ]
  532. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  533. def test_histogram_increased_precision(self):
  534. # range is [1.00, 2.24], so it is divided into 5 buckets of width 0.25
  535. specs = [
  536. (1.00, 1.00, [("foo", 3)]),
  537. (2.24, 2.24, [("foo", 1)]),
  538. ]
  539. self.populate_events(specs)
  540. for array_column in ARRAY_COLUMNS:
  541. alias = get_array_column_alias(array_column)
  542. query = {
  543. "project": [self.project.id],
  544. "field": [f"{alias}.foo"],
  545. "numBuckets": 5,
  546. "precision": 2,
  547. }
  548. response = self.do_request(query)
  549. assert response.status_code == 200, f"failing for {array_column}"
  550. expected = [
  551. (1.00, 1.25, [(f"{alias}.foo", 3)]),
  552. (1.25, 1.50, [(f"{alias}.foo", 0)]),
  553. (1.50, 1.75, [(f"{alias}.foo", 0)]),
  554. (1.75, 2.00, [(f"{alias}.foo", 0)]),
  555. (2.00, 2.25, [(f"{alias}.foo", 1)]),
  556. ]
  557. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  558. def test_histogram_increased_precision_with_min_max(self):
  559. # range is [1.25, 2.24], so it is divided into 5 buckets of width 0.25
  560. specs = [
  561. (1.00, 1.25, [("foo", 3)]),
  562. (2.00, 2.25, [("foo", 1)]),
  563. ]
  564. self.populate_events(specs)
  565. for array_column in ARRAY_COLUMNS:
  566. alias = get_array_column_alias(array_column)
  567. query = {
  568. "project": [self.project.id],
  569. "field": [f"{alias}.foo"],
  570. "numBuckets": 3,
  571. "precision": 2,
  572. "min": 1.25,
  573. "max": 2.00,
  574. }
  575. response = self.do_request(query)
  576. assert response.status_code == 200, f"failing for {array_column}"
  577. expected = [
  578. (1.25, 1.50, [(f"{alias}.foo", 0)]),
  579. (1.50, 1.75, [(f"{alias}.foo", 0)]),
  580. (1.75, 2.00, [(f"{alias}.foo", 0)]),
  581. ]
  582. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  583. def test_histogram_increased_precision_large_buckets(self):
  584. # range is [10.0000, 59.9999] so it is divided into 5 buckets of width 10
  585. specs = [
  586. (10.0000, 10.0000, [("foo", 1)]),
  587. (30.0000, 40.0000, [("foo", 1)]),
  588. (59.9999, 59.9999, [("foo", 2)]),
  589. ]
  590. self.populate_events(specs)
  591. for array_column in ARRAY_COLUMNS:
  592. alias = get_array_column_alias(array_column)
  593. query = {
  594. "project": [self.project.id],
  595. "field": [f"{alias}.foo"],
  596. "numBuckets": 5,
  597. "precision": 4,
  598. }
  599. response = self.do_request(query)
  600. assert response.status_code == 200, f"failing for {array_column}"
  601. expected = [
  602. (10.0000, 20.0000, [(f"{alias}.foo", 1)]),
  603. (20.0000, 30.0000, [(f"{alias}.foo", 0)]),
  604. (30.0000, 40.0000, [(f"{alias}.foo", 1)]),
  605. (40.0000, 50.0000, [(f"{alias}.foo", 0)]),
  606. (50.0000, 60.0000, [(f"{alias}.foo", 2)]),
  607. ]
  608. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  609. def test_histogram_multiple_measures(self):
  610. # range is [10, 59] so it is divided into 5 buckets of width 10
  611. specs = [
  612. (10, 10, [("bar", 0), ("baz", 0), ("foo", 1)]),
  613. (30, 40, [("bar", 2), ("baz", 0), ("foo", 0)]),
  614. (59, 59, [("bar", 0), ("baz", 1), ("foo", 0)]),
  615. ]
  616. self.populate_events(specs)
  617. for array_column in ARRAY_COLUMNS:
  618. alias = get_array_column_alias(array_column)
  619. query = {
  620. "project": [self.project.id],
  621. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  622. "numBuckets": 5,
  623. }
  624. response = self.do_request(query)
  625. assert response.status_code == 200, f"failing for {array_column}"
  626. expected = [
  627. (
  628. 10,
  629. 20,
  630. [
  631. (f"{alias}.bar", 0),
  632. (f"{alias}.baz", 0),
  633. (f"{alias}.foo", 1),
  634. ],
  635. ),
  636. (
  637. 20,
  638. 30,
  639. [
  640. (f"{alias}.bar", 0),
  641. (f"{alias}.baz", 0),
  642. (f"{alias}.foo", 0),
  643. ],
  644. ),
  645. (
  646. 30,
  647. 40,
  648. [
  649. (f"{alias}.bar", 2),
  650. (f"{alias}.baz", 0),
  651. (f"{alias}.foo", 0),
  652. ],
  653. ),
  654. (
  655. 40,
  656. 50,
  657. [
  658. (f"{alias}.bar", 0),
  659. (f"{alias}.baz", 0),
  660. (f"{alias}.foo", 0),
  661. ],
  662. ),
  663. (
  664. 50,
  665. 60,
  666. [
  667. (f"{alias}.bar", 0),
  668. (f"{alias}.baz", 1),
  669. (f"{alias}.foo", 0),
  670. ],
  671. ),
  672. ]
  673. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  674. def test_histogram_max_value_on_edge(self):
  675. # range is [11, 21] so it is divided into 5 buckets of width 5
  676. # because using buckets of width 2 will exclude 21, and the next
  677. # nice number is 5
  678. specs = [
  679. (11, 11, [("bar", 0), ("baz", 0), ("foo", 1)]),
  680. (21, 21, [("bar", 1), ("baz", 1), ("foo", 1)]),
  681. ]
  682. self.populate_events(specs)
  683. for array_column in ARRAY_COLUMNS:
  684. alias = get_array_column_alias(array_column)
  685. query = {
  686. "project": [self.project.id],
  687. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  688. "numBuckets": 5,
  689. }
  690. response = self.do_request(query)
  691. assert response.status_code == 200, f"failing for {array_column}"
  692. expected = [
  693. (
  694. 10,
  695. 15,
  696. [
  697. (f"{alias}.bar", 0),
  698. (f"{alias}.baz", 0),
  699. (f"{alias}.foo", 1),
  700. ],
  701. ),
  702. (
  703. 15,
  704. 20,
  705. [
  706. (f"{alias}.bar", 0),
  707. (f"{alias}.baz", 0),
  708. (f"{alias}.foo", 0),
  709. ],
  710. ),
  711. (
  712. 20,
  713. 25,
  714. [
  715. (f"{alias}.bar", 1),
  716. (f"{alias}.baz", 1),
  717. (f"{alias}.foo", 1),
  718. ],
  719. ),
  720. ]
  721. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  722. def test_histogram_bins_exceed_max(self):
  723. specs = [
  724. (10, 15, [("bar", 0), ("baz", 0), ("foo", 1)]),
  725. (30, 30, [("bar", 1), ("baz", 1), ("foo", 1)]),
  726. ]
  727. self.populate_events(specs)
  728. for array_column in ARRAY_COLUMNS:
  729. alias = get_array_column_alias(array_column)
  730. query = {
  731. "project": [self.project.id],
  732. "field": [f"{alias}.bar", f"{alias}.baz", f"{alias}.foo"],
  733. "numBuckets": 5,
  734. "min": 10,
  735. "max": 21,
  736. }
  737. response = self.do_request(query)
  738. assert response.status_code == 200, f"failing for {array_column}"
  739. expected = [
  740. (
  741. 10,
  742. 15,
  743. [
  744. (f"{alias}.bar", 0),
  745. (f"{alias}.baz", 0),
  746. (f"{alias}.foo", 1),
  747. ],
  748. ),
  749. (
  750. 15,
  751. 20,
  752. [
  753. (f"{alias}.bar", 0),
  754. (f"{alias}.baz", 0),
  755. (f"{alias}.foo", 0),
  756. ],
  757. ),
  758. (
  759. 20,
  760. 25,
  761. [
  762. (f"{alias}.bar", 0),
  763. (f"{alias}.baz", 0),
  764. (f"{alias}.foo", 0),
  765. ],
  766. ),
  767. ]
  768. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  769. def test_bad_params_invalid_data_filter(self):
  770. for array_column in ARRAY_COLUMNS:
  771. alias = get_array_column_alias(array_column)
  772. query = {
  773. "project": [self.project.id],
  774. "field": [f"{alias}.foo", f"{alias}.bar"],
  775. "numBuckets": 10,
  776. "dataFilter": "invalid",
  777. }
  778. response = self.do_request(query)
  779. assert response.status_code == 400, f"failing for {array_column}"
  780. assert response.data == {
  781. "dataFilter": ['"invalid" is not a valid choice.'],
  782. }, f"failing for {array_column}"
  783. def test_histogram_all_data_filter(self):
  784. specs = [
  785. (0, 1, [("foo", 4)]),
  786. (4000, 5000, [("foo", 1)]),
  787. ]
  788. self.populate_events(specs)
  789. for array_column in ARRAY_COLUMNS:
  790. alias = get_array_column_alias(array_column)
  791. query = {
  792. "project": [self.project.id],
  793. "field": [f"{alias}.foo"],
  794. "numBuckets": 5,
  795. "dataFilter": "all",
  796. }
  797. response = self.do_request(query)
  798. assert response.status_code == 200, f"failing for {array_column}"
  799. expected = [
  800. (0, 1000, [(f"{alias}.foo", 4)]),
  801. (1000, 2000, [(f"{alias}.foo", 0)]),
  802. (2000, 3000, [(f"{alias}.foo", 0)]),
  803. (3000, 4000, [(f"{alias}.foo", 0)]),
  804. (4000, 5000, [(f"{alias}.foo", 1)]),
  805. ]
  806. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  807. def test_histogram_exclude_outliers_data_filter(self):
  808. specs = [
  809. (0, 0, [("foo", 4)]),
  810. (4000, 4001, [("foo", 1)]),
  811. ]
  812. self.populate_events(specs)
  813. for array_column in ARRAY_COLUMNS:
  814. alias = get_array_column_alias(array_column)
  815. query = {
  816. "project": [self.project.id],
  817. "field": [f"{alias}.foo"],
  818. "numBuckets": 5,
  819. "dataFilter": "exclude_outliers",
  820. }
  821. response = self.do_request(query)
  822. assert response.status_code == 200, f"failing for {array_column}"
  823. expected = [
  824. (0, 1, [(f"{alias}.foo", 4)]),
  825. ]
  826. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  827. def test_histogram_missing_measurement_data(self):
  828. # make sure there is at least one transaction
  829. specs = [
  830. (0, 1, [("foo", 1)]),
  831. ]
  832. self.populate_events(specs)
  833. for array_column in ARRAY_COLUMNS:
  834. alias = get_array_column_alias(array_column)
  835. query = {
  836. "project": [self.project.id],
  837. # make sure to query a measurement that does not exist
  838. "field": [f"{alias}.bar"],
  839. "numBuckets": 5,
  840. "dataFilter": "exclude_outliers",
  841. }
  842. response = self.do_request(query)
  843. assert response.status_code == 200, f"failing for {array_column}"
  844. expected = [
  845. (0, 1, [(f"{alias}.bar", 0)]),
  846. (1, 1, [(f"{alias}.bar", 0)]),
  847. (2, 2, [(f"{alias}.bar", 0)]),
  848. (3, 3, [(f"{alias}.bar", 0)]),
  849. (4, 4, [(f"{alias}.bar", 0)]),
  850. ]
  851. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  852. def test_histogram_missing_measurement_data_with_explicit_bounds(self):
  853. # make sure there is at least one transaction
  854. specs = [
  855. (0, 1, [("foo", 1)]),
  856. ]
  857. self.populate_events(specs)
  858. for array_column in ARRAY_COLUMNS:
  859. alias = get_array_column_alias(array_column)
  860. query = {
  861. "project": [self.project.id],
  862. # make sure to query a measurement that does not exist
  863. "field": [f"{alias}.bar"],
  864. "numBuckets": 5,
  865. "dataFilter": "exclude_outliers",
  866. "min": 10,
  867. }
  868. response = self.do_request(query)
  869. assert response.status_code == 200, f"failing for {array_column}"
  870. expected = [
  871. (10, 11, [(f"{alias}.bar", 0)]),
  872. (11, 11, [(f"{alias}.bar", 0)]),
  873. (12, 12, [(f"{alias}.bar", 0)]),
  874. (13, 13, [(f"{alias}.bar", 0)]),
  875. (14, 14, [(f"{alias}.bar", 0)]),
  876. ]
  877. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  878. def test_histogram_ignores_aggregate_conditions(self):
  879. # range is [0, 5), so it is divided into 5 buckets of width 1
  880. specs = [
  881. (0, 1, [("foo", 1)]),
  882. (1, 2, [("foo", 1)]),
  883. (2, 3, [("foo", 1)]),
  884. (3, 4, [("foo", 0)]),
  885. (4, 5, [("foo", 1)]),
  886. ]
  887. self.populate_events(specs)
  888. for array_column in ARRAY_COLUMNS:
  889. alias = get_array_column_alias(array_column)
  890. query = {
  891. "project": [self.project.id],
  892. "field": [f"{alias}.foo"],
  893. "numBuckets": 5,
  894. "query": "tpm():>0.001",
  895. }
  896. response = self.do_request(query)
  897. assert response.status_code == 200, f"failing for {array_column}"
  898. expected = [
  899. (0, 1, [(f"{alias}.foo", 1)]),
  900. (1, 2, [(f"{alias}.foo", 1)]),
  901. (2, 3, [(f"{alias}.foo", 1)]),
  902. (3, 4, [(f"{alias}.foo", 0)]),
  903. (4, 5, [(f"{alias}.foo", 1)]),
  904. ]
  905. assert response.data == self.as_response_data(expected), f"failing for {array_column}"
  906. def test_histogram_outlier_filtering_with_no_rows(self):
  907. query = {
  908. "project": [self.project.id],
  909. "field": ["transaction.duration"],
  910. "numBuckets": 5,
  911. "dataFilter": "exclude_outliers",
  912. }
  913. response = self.do_request(query)
  914. assert response.status_code == 200
  915. expected = [
  916. (0, 1, [("transaction.duration", 0)]),
  917. ]
  918. assert response.data == self.as_response_data(expected)
  919. class OrganizationEventsMetricsEnhancedPerformanceHistogramEndpointTest(
  920. MetricsEnhancedPerformanceTestCase
  921. ):
  922. def setUp(self):
  923. super().setUp()
  924. self.min_ago = iso_format(before_now(minutes=1))
  925. self.features = {}
  926. def populate_events(self, specs):
  927. start = before_now(minutes=5)
  928. for spec in specs:
  929. spec = HistogramSpec(*spec)
  930. for suffix_key, count in spec.fields:
  931. for i in range(count):
  932. self.store_transaction_metric(
  933. (spec.end + spec.start) / 2,
  934. metric=suffix_key,
  935. tags={"transaction": suffix_key, **spec.tags},
  936. timestamp=start,
  937. aggregation_option=AggregationOption.HIST,
  938. )
  939. def as_response_data(self, specs):
  940. data: dict[str, list[dict[str, int]]] = {}
  941. for spec in specs:
  942. spec = HistogramSpec(*spec)
  943. for measurement, count in sorted(spec.fields):
  944. if measurement not in data:
  945. data[measurement] = []
  946. data[measurement].append({"bin": spec.start, "count": count})
  947. return data
  948. def do_request(self, query, features=None):
  949. if features is None:
  950. features = {
  951. "organizations:performance-view": True,
  952. "organizations:performance-use-metrics": True,
  953. }
  954. features.update(self.features)
  955. self.login_as(user=self.user)
  956. url = reverse(
  957. "sentry-api-0-organization-events-histogram",
  958. kwargs={"organization_id_or_slug": self.organization.slug},
  959. )
  960. with self.feature(features):
  961. return self.client.get(url, query, format="json")
  962. def test_no_projects(self):
  963. response = self.do_request({})
  964. assert response.status_code == 200, response.content
  965. assert response.data == {}
  966. def test_histogram_simple(self):
  967. specs = [
  968. (0, 1, [("transaction.duration", 5)]),
  969. (1, 2, [("transaction.duration", 10)]),
  970. (2, 3, [("transaction.duration", 1)]),
  971. (4, 5, [("transaction.duration", 15)]),
  972. ]
  973. self.populate_events(specs)
  974. query = {
  975. "project": [self.project.id],
  976. "field": ["transaction.duration"],
  977. "numBuckets": 5,
  978. "dataset": "metrics",
  979. }
  980. response = self.do_request(query)
  981. assert response.status_code == 200, response.content
  982. expected = [
  983. (0, 1, [("transaction.duration", 6)]),
  984. (1, 2, [("transaction.duration", 9)]),
  985. (2, 3, [("transaction.duration", 3)]),
  986. (3, 4, [("transaction.duration", 8)]),
  987. (4, 5, [("transaction.duration", 7)]),
  988. ]
  989. # Note metrics data is approximate, these values are based on running the test and asserting the results
  990. expected_response = self.as_response_data(expected)
  991. expected_response["meta"] = {"isMetricsData": True}
  992. assert response.data == expected_response
  993. def test_multi_histogram(self):
  994. specs = [
  995. (0, 1, [("measurements.fcp", 5), ("measurements.lcp", 5)]),
  996. (1, 2, [("measurements.fcp", 5), ("measurements.lcp", 5)]),
  997. ]
  998. self.populate_events(specs)
  999. query = {
  1000. "project": [self.project.id],
  1001. "field": ["measurements.fcp", "measurements.lcp"],
  1002. "numBuckets": 2,
  1003. "dataset": "metrics",
  1004. }
  1005. response = self.do_request(query)
  1006. assert response.status_code == 200, response.content
  1007. expected = [
  1008. (0, 1, [("measurements.fcp", 5), ("measurements.lcp", 5)]),
  1009. (1, 2, [("measurements.fcp", 5), ("measurements.lcp", 5)]),
  1010. ]
  1011. # Note metrics data is approximate, these values are based on running the test and asserting the results
  1012. expected_response = self.as_response_data(expected)
  1013. expected_response["meta"] = {"isMetricsData": True}
  1014. assert response.data == expected_response
  1015. def test_histogram_exclude_outliers_data_filter(self):
  1016. specs = [
  1017. (0, 0, [("transaction.duration", 4)], {"histogram_outlier": "inlier"}),
  1018. (1, 1, [("transaction.duration", 4)], {"histogram_outlier": "inlier"}),
  1019. (4000, 4001, [("transaction.duration", 1)], {"histogram_outlier": "outlier"}),
  1020. ]
  1021. self.populate_events(specs)
  1022. query = {
  1023. "project": [self.project.id],
  1024. "field": ["transaction.duration"],
  1025. "numBuckets": 5,
  1026. "dataFilter": "exclude_outliers",
  1027. "dataset": "metrics",
  1028. }
  1029. response = self.do_request(query)
  1030. assert response.status_code == 200, response.content
  1031. # Metrics approximation means both buckets got merged
  1032. expected = [
  1033. (0, 0, [("transaction.duration", 8)]),
  1034. (1, 2, [("transaction.duration", 0)]),
  1035. ]
  1036. expected_response = self.as_response_data(expected)
  1037. expected_response["meta"] = {"isMetricsData": True}
  1038. assert response.data == expected_response
  1039. class OrganizationEventsMetricsEnhancedPerformanceHistogramEndpointTestWithMetricLayer(
  1040. OrganizationEventsMetricsEnhancedPerformanceHistogramEndpointTest
  1041. ):
  1042. def setUp(self):
  1043. super().setUp()
  1044. self.features["organizations:use-metrics-layer"] = True