test_tsdb_backend.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. import functools
  2. from datetime import datetime, timedelta
  3. from typing import Optional, Sequence
  4. from unittest import mock
  5. from unittest.mock import patch
  6. import pytz
  7. from sentry.event_manager import _pull_out_data
  8. from sentry.models import Environment, Group, GroupRelease, Release
  9. from sentry.testutils import SnubaTestCase, TestCase
  10. from sentry.testutils.helpers.datetime import iso_format
  11. from sentry.testutils.silo import region_silo_test
  12. from sentry.tsdb.base import TSDBModel
  13. from sentry.tsdb.snuba import SnubaTSDB
  14. from sentry.utils.dates import to_datetime, to_timestamp
  15. from sentry.utils.snuba import aliased_query
  16. def timestamp(d):
  17. t = int(to_timestamp(d))
  18. return t - (t % 3600)
  19. def has_shape(data, shape, allow_empty=False):
  20. """
  21. Determine if a data object has the provided shape
  22. At any level, the object in `data` and in `shape` must have the same type.
  23. A dict is the same shape if all its keys and values have the same shape as the
  24. key/value in `shape`. The number of keys/values is not relevant.
  25. A list is the same shape if all its items have the same shape as the value
  26. in `shape`
  27. A tuple is the same shape if it has the same length as `shape` and all the
  28. values have the same shape as the corresponding value in `shape`
  29. Any other object simply has to have the same type.
  30. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  31. """
  32. if not isinstance(data, type(shape)):
  33. return False
  34. if isinstance(data, dict):
  35. return (
  36. (allow_empty or len(data) > 0)
  37. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  38. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  39. )
  40. elif isinstance(data, list):
  41. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  42. elif isinstance(data, tuple):
  43. return len(data) == len(shape) and all(
  44. has_shape(data[i], shape[i]) for i in range(len(data))
  45. )
  46. else:
  47. return True
  48. class SnubaTSDBTest(TestCase, SnubaTestCase):
  49. def setUp(self):
  50. super().setUp()
  51. self.db = SnubaTSDB()
  52. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  53. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  54. )
  55. self.proj1 = self.create_project()
  56. env1 = "test"
  57. env2 = "dev"
  58. defaultenv = ""
  59. release1 = "1" * 10
  60. release2 = "2" * 10
  61. self.release1 = Release.objects.create(
  62. organization_id=self.organization.id, version=release1, date_added=self.now
  63. )
  64. self.release1.add_project(self.proj1)
  65. self.release2 = Release.objects.create(
  66. organization_id=self.organization.id, version=release2, date_added=self.now
  67. )
  68. self.release2.add_project(self.proj1)
  69. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  70. self.store_event(
  71. data={
  72. "event_id": (str(r) * 32)[:32],
  73. "message": "message 1",
  74. "platform": "python",
  75. "fingerprint": [["group-1"], ["group-2"]][
  76. (r // 600) % 2
  77. ], # Switch every 10 mins
  78. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  79. "tags": {
  80. "foo": "bar",
  81. "baz": "quux",
  82. # Switch every 2 hours
  83. "environment": [env1, None][(r // 7200) % 3],
  84. "sentry:user": f"id:user{r // 3300}",
  85. },
  86. "user": {
  87. # change every 55 min so some hours have 1 user, some have 2
  88. "id": f"user{r // 3300}",
  89. "email": f"user{r}@sentry.io",
  90. },
  91. "release": str(r // 3600) * 10, # 1 per hour,
  92. },
  93. project_id=self.proj1.id,
  94. )
  95. groups = Group.objects.filter(project=self.proj1).order_by("id")
  96. self.proj1group1 = groups[0]
  97. self.proj1group2 = groups[1]
  98. self.env1 = Environment.objects.get(name=env1)
  99. self.env2 = self.create_environment(name=env2) # No events
  100. self.defaultenv = Environment.objects.get(name=defaultenv)
  101. self.group1release1env1 = GroupRelease.objects.get(
  102. project_id=self.proj1.id,
  103. group_id=self.proj1group1.id,
  104. release_id=self.release1.id,
  105. environment=env1,
  106. )
  107. self.group1release2env1 = GroupRelease.objects.create(
  108. project_id=self.proj1.id,
  109. group_id=self.proj1group1.id,
  110. release_id=self.release2.id,
  111. environment=env1,
  112. )
  113. self.group2release1env1 = GroupRelease.objects.get(
  114. project_id=self.proj1.id,
  115. group_id=self.proj1group2.id,
  116. release_id=self.release1.id,
  117. environment=env1,
  118. )
  119. def test_range_single(self):
  120. env1 = "test"
  121. project = self.create_project()
  122. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  123. self.store_event(
  124. data={
  125. "event_id": (str(r) * 32)[:32],
  126. "message": "message 1",
  127. "platform": "python",
  128. "fingerprint": ["group-1"],
  129. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  130. "tags": {
  131. "foo": "bar",
  132. "baz": "quux",
  133. # Switch every 2 hours
  134. "environment": [env1, None][(r // 7200) % 3],
  135. "sentry:user": f"id:user{r // 3300}",
  136. },
  137. "user": {
  138. # change every 55 min so some hours have 1 user, some have 2
  139. "id": f"user{r // 3300}",
  140. "email": f"user{r}@sentry.io",
  141. },
  142. "release": str(r // 3600) * 10, # 1 per hour,
  143. },
  144. project_id=project.id,
  145. )
  146. groups = Group.objects.filter(project=project).order_by("id")
  147. group = groups[0]
  148. dts = [self.now + timedelta(hours=i) for i in range(4)]
  149. assert self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1], rollup=3600) == {
  150. group.id: [
  151. (timestamp(dts[0]), 6 * 2),
  152. (timestamp(dts[1]), 6 * 2),
  153. (timestamp(dts[2]), 6 * 2),
  154. (timestamp(dts[3]), 6 * 2),
  155. ]
  156. }
  157. def test_range_groups(self):
  158. dts = [self.now + timedelta(hours=i) for i in range(4)]
  159. assert self.db.get_range(
  160. TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  161. ) == {
  162. self.proj1group1.id: [
  163. (timestamp(dts[0]), 3),
  164. (timestamp(dts[1]), 3),
  165. (timestamp(dts[2]), 3),
  166. (timestamp(dts[3]), 3),
  167. ]
  168. }
  169. # Multiple groups
  170. assert self.db.get_range(
  171. TSDBModel.group,
  172. [self.proj1group1.id, self.proj1group2.id],
  173. dts[0],
  174. dts[-1],
  175. rollup=3600,
  176. ) == {
  177. self.proj1group1.id: [
  178. (timestamp(dts[0]), 3),
  179. (timestamp(dts[1]), 3),
  180. (timestamp(dts[2]), 3),
  181. (timestamp(dts[3]), 3),
  182. ],
  183. self.proj1group2.id: [
  184. (timestamp(dts[0]), 3),
  185. (timestamp(dts[1]), 3),
  186. (timestamp(dts[2]), 3),
  187. (timestamp(dts[3]), 3),
  188. ],
  189. }
  190. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  191. def test_range_releases(self):
  192. dts = [self.now + timedelta(hours=i) for i in range(4)]
  193. assert self.db.get_range(
  194. TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600
  195. ) == {
  196. self.release1.id: [
  197. (timestamp(dts[0]), 0),
  198. (timestamp(dts[1]), 6),
  199. (timestamp(dts[2]), 0),
  200. (timestamp(dts[3]), 0),
  201. ]
  202. }
  203. def test_range_project(self):
  204. dts = [self.now + timedelta(hours=i) for i in range(4)]
  205. assert self.db.get_range(
  206. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  207. ) == {
  208. self.proj1.id: [
  209. (timestamp(dts[0]), 6),
  210. (timestamp(dts[1]), 6),
  211. (timestamp(dts[2]), 6),
  212. (timestamp(dts[3]), 6),
  213. ]
  214. }
  215. def test_range_environment_filter(self):
  216. dts = [self.now + timedelta(hours=i) for i in range(4)]
  217. assert self.db.get_range(
  218. TSDBModel.project,
  219. [self.proj1.id],
  220. dts[0],
  221. dts[-1],
  222. rollup=3600,
  223. environment_ids=[self.env1.id],
  224. ) == {
  225. self.proj1.id: [
  226. (timestamp(dts[0]), 6),
  227. (timestamp(dts[1]), 6),
  228. (timestamp(dts[2]), 0),
  229. (timestamp(dts[3]), 0),
  230. ]
  231. }
  232. # No events submitted for env2
  233. assert self.db.get_range(
  234. TSDBModel.project,
  235. [self.proj1.id],
  236. dts[0],
  237. dts[-1],
  238. rollup=3600,
  239. environment_ids=[self.env2.id],
  240. ) == {
  241. self.proj1.id: [
  242. (timestamp(dts[0]), 0),
  243. (timestamp(dts[1]), 0),
  244. (timestamp(dts[2]), 0),
  245. (timestamp(dts[3]), 0),
  246. ]
  247. }
  248. # Events submitted with no environment should match default environment
  249. assert self.db.get_range(
  250. TSDBModel.project,
  251. [self.proj1.id],
  252. dts[0],
  253. dts[-1],
  254. rollup=3600,
  255. environment_ids=[self.defaultenv.id],
  256. ) == {
  257. self.proj1.id: [
  258. (timestamp(dts[0]), 0),
  259. (timestamp(dts[1]), 0),
  260. (timestamp(dts[2]), 6),
  261. (timestamp(dts[3]), 6),
  262. ]
  263. }
  264. def test_range_rollups(self):
  265. # Daily
  266. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  267. dts = [daystart + timedelta(days=i) for i in range(2)]
  268. assert self.db.get_range(
  269. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400
  270. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  271. # Minutely
  272. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  273. # Expect every 10th minute to have a 1, else 0
  274. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  275. assert self.db.get_range(
  276. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60
  277. ) == {self.proj1.id: expected}
  278. def test_distinct_counts_series_users(self):
  279. dts = [self.now + timedelta(hours=i) for i in range(4)]
  280. assert self.db.get_distinct_counts_series(
  281. TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  282. ) == {
  283. self.proj1group1.id: [
  284. (timestamp(dts[0]), 1),
  285. (timestamp(dts[1]), 1),
  286. (timestamp(dts[2]), 1),
  287. (timestamp(dts[3]), 2),
  288. ]
  289. }
  290. dts = [self.now + timedelta(hours=i) for i in range(4)]
  291. assert self.db.get_distinct_counts_series(
  292. TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  293. ) == {
  294. self.proj1.id: [
  295. (timestamp(dts[0]), 1),
  296. (timestamp(dts[1]), 2),
  297. (timestamp(dts[2]), 2),
  298. (timestamp(dts[3]), 2),
  299. ]
  300. }
  301. assert (
  302. self.db.get_distinct_counts_series(
  303. TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600
  304. )
  305. == {}
  306. )
  307. def get_distinct_counts_totals_users(self):
  308. assert self.db.get_distinct_counts_totals(
  309. TSDBModel.users_affected_by_group,
  310. [self.proj1group1.id],
  311. self.now,
  312. self.now + timedelta(hours=4),
  313. rollup=3600,
  314. ) == {
  315. self.proj1group1.id: 2 # 2 unique users overall
  316. }
  317. assert self.db.get_distinct_counts_totals(
  318. TSDBModel.users_affected_by_group,
  319. [self.proj1group1.id],
  320. self.now,
  321. self.now,
  322. rollup=3600,
  323. ) == {
  324. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  325. }
  326. assert self.db.get_distinct_counts_totals(
  327. TSDBModel.users_affected_by_project,
  328. [self.proj1.id],
  329. self.now,
  330. self.now + timedelta(hours=4),
  331. rollup=3600,
  332. ) == {self.proj1.id: 2}
  333. assert (
  334. self.db.get_distinct_counts_totals(
  335. TSDBModel.users_affected_by_group,
  336. [],
  337. self.now,
  338. self.now + timedelta(hours=4),
  339. rollup=3600,
  340. )
  341. == {}
  342. )
  343. def test_most_frequent(self):
  344. assert self.db.get_most_frequent(
  345. TSDBModel.frequent_issues_by_project,
  346. [self.proj1.id],
  347. self.now,
  348. self.now + timedelta(hours=4),
  349. rollup=3600,
  350. ) in [
  351. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  352. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  353. ] # Both issues equally frequent
  354. assert (
  355. self.db.get_most_frequent(
  356. TSDBModel.frequent_issues_by_project,
  357. [],
  358. self.now,
  359. self.now + timedelta(hours=4),
  360. rollup=3600,
  361. )
  362. == {}
  363. )
  364. def test_frequency_series(self):
  365. dts = [self.now + timedelta(hours=i) for i in range(4)]
  366. assert self.db.get_frequency_series(
  367. TSDBModel.frequent_releases_by_group,
  368. {
  369. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  370. self.proj1group2.id: (self.group2release1env1.id,),
  371. },
  372. dts[0],
  373. dts[-1],
  374. rollup=3600,
  375. ) == {
  376. self.proj1group1.id: [
  377. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  378. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  379. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  380. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  381. ],
  382. self.proj1group2.id: [
  383. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  384. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  385. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  386. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  387. ],
  388. }
  389. assert (
  390. self.db.get_frequency_series(
  391. TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600
  392. )
  393. == {}
  394. )
  395. def test_result_shape(self):
  396. """
  397. Tests that the results from the different TSDB methods have the
  398. expected format.
  399. """
  400. project_id = self.proj1.id
  401. dts = [self.now + timedelta(hours=i) for i in range(4)]
  402. results = self.db.get_most_frequent(
  403. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  404. )
  405. assert has_shape(results, {1: [(1, 1.0)]})
  406. results = self.db.get_most_frequent_series(
  407. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  408. )
  409. assert has_shape(results, {1: [(1, {1: 1.0})]})
  410. items = {
  411. # {project_id: (issue_id, issue_id, ...)}
  412. project_id: (self.proj1group1.id, self.proj1group2.id)
  413. }
  414. results = self.db.get_frequency_series(
  415. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  416. )
  417. assert has_shape(results, {1: [(1, {1: 1})]})
  418. results = self.db.get_frequency_totals(
  419. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  420. )
  421. assert has_shape(results, {1: {1: 1}})
  422. results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
  423. assert has_shape(results, {1: [(1, 1)]})
  424. results = self.db.get_distinct_counts_series(
  425. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  426. )
  427. assert has_shape(results, {1: [(1, 1)]})
  428. results = self.db.get_distinct_counts_totals(
  429. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  430. )
  431. assert has_shape(results, {1: 1})
  432. results = self.db.get_distinct_counts_union(
  433. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  434. )
  435. assert has_shape(results, 1)
  436. def test_calculated_limit(self):
  437. with patch("sentry.tsdb.snuba.snuba") as snuba:
  438. # 24h test
  439. rollup = 3600
  440. end = self.now
  441. start = end + timedelta(days=-1, seconds=rollup)
  442. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  443. assert snuba.query.call_args[1]["limit"] == 120
  444. # 14 day test
  445. rollup = 86400
  446. start = end + timedelta(days=-14, seconds=rollup)
  447. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  448. assert snuba.query.call_args[1]["limit"] == 70
  449. # 1h test
  450. rollup = 3600
  451. end = self.now
  452. start = end + timedelta(hours=-1, seconds=rollup)
  453. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  454. assert snuba.query.call_args[1]["limit"] == 5
  455. @region_silo_test
  456. class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase):
  457. def setUp(self):
  458. super().setUp()
  459. self.db = SnubaTSDB()
  460. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  461. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  462. )
  463. self.proj1 = self.create_project()
  464. env1 = "test"
  465. env2 = "dev"
  466. defaultenv = ""
  467. self.proj1group1 = self.create_group(project=self.proj1)
  468. self.proj1group2 = self.create_group(project=self.proj1)
  469. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  470. self.__insert_transaction(
  471. environment=[env1, None][(r // 7200) % 3],
  472. project_id=self.proj1.id,
  473. # change every 55 min so some hours have 1 user, some have 2
  474. user_id=f"user{r // 3300}",
  475. email=f"user{r}@sentry.io",
  476. # release_version=str(r // 3600) * 10, # 1 per hour,
  477. insert_timestamp=self.now + timedelta(seconds=r),
  478. groups=[[self.proj1group1], [self.proj1group2]][(r // 600) % 2],
  479. transaction_name=str(r),
  480. )
  481. self.env1 = Environment.objects.get(name=env1)
  482. self.env2 = self.create_environment(name=env2) # No events
  483. self.defaultenv = Environment.objects.get(name=defaultenv)
  484. def __insert_transaction(
  485. self,
  486. environment: Optional[str],
  487. project_id: int,
  488. user_id: str,
  489. email: str,
  490. insert_timestamp: datetime,
  491. groups: Sequence[int],
  492. transaction_name: str,
  493. ):
  494. def inject_group_ids(jobs, projects, _groups=None):
  495. _pull_out_data(jobs, projects)
  496. if _groups:
  497. for job in jobs:
  498. job["event"].groups = _groups
  499. return jobs, projects
  500. event_data = {
  501. "type": "transaction",
  502. "level": "info",
  503. "message": "transaction message",
  504. "tags": {
  505. "environment": environment,
  506. "sentry:user": f"id:{user_id}",
  507. },
  508. "user": {
  509. "id": user_id,
  510. "email": email,
  511. },
  512. "contexts": {"trace": {"trace_id": "b" * 32, "span_id": "c" * 16, "op": ""}},
  513. "timestamp": insert_timestamp.timestamp(),
  514. "start_timestamp": insert_timestamp.timestamp(),
  515. "transaction": transaction_name,
  516. }
  517. with mock.patch(
  518. "sentry.event_manager._pull_out_data",
  519. functools.partial(
  520. inject_group_ids,
  521. _groups=groups,
  522. ),
  523. ):
  524. event = self.store_event(
  525. data=event_data,
  526. project_id=project_id,
  527. )
  528. assert event
  529. from sentry.utils import snuba
  530. result = snuba.raw_query(
  531. dataset=snuba.Dataset.Transactions,
  532. start=insert_timestamp - timedelta(days=1),
  533. end=insert_timestamp + timedelta(days=1),
  534. selected_columns=[
  535. "event_id",
  536. "project_id",
  537. "environment",
  538. "group_ids",
  539. "tags[sentry:user]",
  540. "timestamp",
  541. ],
  542. groupby=None,
  543. filter_keys={"project_id": [project_id], "event_id": [event.event_id]},
  544. )
  545. assert len(result["data"]) == 1
  546. assert result["data"][0]["event_id"] == event.event_id
  547. assert result["data"][0]["project_id"] == event.project_id
  548. assert result["data"][0]["group_ids"] == [g.id for g in groups]
  549. assert result["data"][0]["tags[sentry:user]"] == f"id:{user_id}"
  550. assert result["data"][0]["environment"] == (environment if environment else None)
  551. assert result["data"][0]["timestamp"] == insert_timestamp.isoformat()
  552. return event
  553. def test_range_groups_single(self):
  554. from sentry.snuba.dataset import Dataset
  555. now = (datetime.utcnow() - timedelta(hours=4)).replace(
  556. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  557. )
  558. dts = [now + timedelta(hours=i) for i in range(4)]
  559. project = self.create_project()
  560. group = self.create_group(project=project, first_seen=now)
  561. # not sure what's going on here, but `times=1,2,3,4` work fine
  562. # fails with anything above 4
  563. times = 4
  564. event_ids = []
  565. events = []
  566. for i in range(0, times):
  567. res = self.__insert_transaction(
  568. environment=None,
  569. project_id=project.id,
  570. user_id="my_user",
  571. email="test@email.com",
  572. insert_timestamp=now + timedelta(minutes=i * 10),
  573. groups=[group],
  574. transaction_name=str(i),
  575. )
  576. grouped_by_project = aliased_query(
  577. dataset=Dataset.Transactions,
  578. start=None,
  579. end=None,
  580. groupby=None,
  581. conditions=None,
  582. filter_keys={"project_id": [project.id], "event_id": [res.event_id]},
  583. selected_columns=["event_id", "project_id", "group_ids"],
  584. aggregations=None,
  585. )
  586. assert grouped_by_project["data"][0]["event_id"] == res.event_id
  587. from sentry.eventstore.models import Event
  588. event_from_nodestore = Event(project_id=project.id, event_id=res.event_id)
  589. assert event_from_nodestore.event_id == res.event_id
  590. event_ids.append(res.event_id)
  591. events.append(res)
  592. transactions_for_project = aliased_query(
  593. dataset=Dataset.Transactions,
  594. start=None,
  595. end=None,
  596. groupby=None,
  597. conditions=None,
  598. filter_keys={"project_id": [project.id]},
  599. selected_columns=["project_id", "event_id"],
  600. aggregations=None,
  601. )
  602. assert len(transactions_for_project["data"]) == times
  603. transactions_by_group = aliased_query(
  604. dataset=Dataset.Transactions,
  605. start=None,
  606. end=None,
  607. # start=group.first_seen,
  608. # end=now + timedelta(hours=4),
  609. groupby=["group_id"],
  610. conditions=None,
  611. filter_keys={"project_id": [project.id], "group_id": [group.id]},
  612. aggregations=[
  613. ["arrayJoin", ["group_ids"], "group_id"],
  614. ["count()", "", "times_seen"],
  615. ],
  616. )
  617. assert transactions_by_group["data"][0]["times_seen"] == times # 1 + (times % 5)
  618. assert self.db.get_range(
  619. TSDBModel.group_performance,
  620. [group.id],
  621. dts[0],
  622. dts[-1],
  623. rollup=3600,
  624. ) == {
  625. group.id: [
  626. # (timestamp(dts[0]), 1 + (times % 5)),
  627. (timestamp(dts[0]), times),
  628. (timestamp(dts[1]), 0),
  629. (timestamp(dts[2]), 0),
  630. (timestamp(dts[3]), 0),
  631. ]
  632. }
  633. def test_range_groups_mult(self):
  634. now = (datetime.utcnow() - timedelta(hours=4)).replace(
  635. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  636. )
  637. dts = [now + timedelta(hours=i) for i in range(4)]
  638. project = self.create_project()
  639. group = self.create_group(project=project)
  640. ids = ["a", "b", "c", "d", "e", "f", "1", "2", "3", "4", "5"]
  641. for i, _id in enumerate(ids):
  642. self.__insert_transaction(
  643. environment=None,
  644. project_id=project.id,
  645. user_id="my_user",
  646. email="test@email.com",
  647. insert_timestamp=now + timedelta(minutes=i * 10),
  648. groups=[group],
  649. transaction_name=_id,
  650. )
  651. assert self.db.get_range(
  652. TSDBModel.group_performance,
  653. [group.id],
  654. dts[0],
  655. dts[-1],
  656. rollup=3600,
  657. ) == {
  658. group.id: [
  659. (timestamp(dts[0]), 6),
  660. (timestamp(dts[1]), 5),
  661. (timestamp(dts[2]), 0),
  662. (timestamp(dts[3]), 0),
  663. ]
  664. }
  665. def test_range_groups_simple(self):
  666. project = self.create_project()
  667. group = self.create_group(project=project)
  668. now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC)
  669. # for r in range(0, 14400, 600): # Every 10 min for 4 hours
  670. # for r in [1, 2, 3, 4, 5, 6, 7, 8]:
  671. ids = ["a", "b", "c", "d", "e"] # , "f"]
  672. for r in ids:
  673. # for r in range(0, 9, 1):
  674. self.__insert_transaction(
  675. environment=None,
  676. project_id=project.id,
  677. # change every 55 min so some hours have 1 user, some have 2
  678. user_id=f"user{r}",
  679. email=f"user{r}@sentry.io",
  680. # release_version=str(r // 3600) * 10, # 1 per hour,
  681. insert_timestamp=now,
  682. groups=[group],
  683. transaction_name=r,
  684. )
  685. dts = [now + timedelta(hours=i) for i in range(4)]
  686. assert self.db.get_range(
  687. TSDBModel.group_performance,
  688. [group.id],
  689. dts[0],
  690. dts[-1],
  691. rollup=3600,
  692. ) == {
  693. group.id: [
  694. (timestamp(dts[0]), len(ids)),
  695. (timestamp(dts[1]), 0),
  696. (timestamp(dts[2]), 0),
  697. (timestamp(dts[3]), 0),
  698. ]
  699. }
  700. def test_range_groups(self):
  701. dts = [self.now + timedelta(hours=i) for i in range(4)]
  702. # Multiple groups
  703. assert self.db.get_range(
  704. TSDBModel.group_performance,
  705. [self.proj1group1.id, self.proj1group2.id],
  706. dts[0],
  707. dts[-1],
  708. rollup=3600,
  709. ) == {
  710. self.proj1group1.id: [
  711. (timestamp(dts[0]), 3),
  712. (timestamp(dts[1]), 3),
  713. (timestamp(dts[2]), 3),
  714. (timestamp(dts[3]), 3),
  715. ],
  716. self.proj1group2.id: [
  717. (timestamp(dts[0]), 3),
  718. (timestamp(dts[1]), 3),
  719. (timestamp(dts[2]), 3),
  720. (timestamp(dts[3]), 3),
  721. ],
  722. }
  723. assert (
  724. self.db.get_range(TSDBModel.group_performance, [], dts[0], dts[-1], rollup=3600) == {}
  725. )
  726. class AddJitterToSeriesTest(TestCase):
  727. def setUp(self):
  728. self.db = SnubaTSDB()
  729. def run_test(self, end, interval, jitter, expected_start, expected_end):
  730. end = end.replace(tzinfo=pytz.UTC)
  731. start = end - interval
  732. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  733. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  734. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  735. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  736. def test(self):
  737. self.run_test(
  738. end=datetime(2022, 5, 18, 10, 23, 4),
  739. interval=timedelta(hours=1),
  740. jitter=5,
  741. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  742. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  743. )
  744. self.run_test(
  745. end=datetime(2022, 5, 18, 10, 23, 8),
  746. interval=timedelta(hours=1),
  747. jitter=5,
  748. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  749. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  750. )
  751. # Jitter should be the same
  752. self.run_test(
  753. end=datetime(2022, 5, 18, 10, 23, 8),
  754. interval=timedelta(hours=1),
  755. jitter=55,
  756. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  757. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  758. )
  759. self.run_test(
  760. end=datetime(2022, 5, 18, 22, 33, 2),
  761. interval=timedelta(minutes=1),
  762. jitter=3,
  763. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  764. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  765. )
  766. def test_empty_series(self):
  767. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  768. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []