test_tsdb_backend.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853
  1. import functools
  2. from datetime import datetime, timedelta
  3. from typing import Optional, Sequence
  4. from unittest import mock
  5. from unittest.mock import patch
  6. import pytz
  7. from sentry.event_manager import _pull_out_data
  8. from sentry.models import Environment, Group, GroupRelease, Release
  9. from sentry.testutils import SnubaTestCase, TestCase
  10. from sentry.testutils.helpers.datetime import iso_format
  11. from sentry.tsdb.base import TSDBModel
  12. from sentry.tsdb.snuba import SnubaTSDB
  13. from sentry.utils.dates import to_datetime, to_timestamp
  14. from sentry.utils.snuba import aliased_query
  15. def timestamp(d):
  16. t = int(to_timestamp(d))
  17. return t - (t % 3600)
  18. def has_shape(data, shape, allow_empty=False):
  19. """
  20. Determine if a data object has the provided shape
  21. At any level, the object in `data` and in `shape` must have the same type.
  22. A dict is the same shape if all its keys and values have the same shape as the
  23. key/value in `shape`. The number of keys/values is not relevant.
  24. A list is the same shape if all its items have the same shape as the value
  25. in `shape`
  26. A tuple is the same shape if it has the same length as `shape` and all the
  27. values have the same shape as the corresponding value in `shape`
  28. Any other object simply has to have the same type.
  29. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  30. """
  31. if not isinstance(data, type(shape)):
  32. return False
  33. if isinstance(data, dict):
  34. return (
  35. (allow_empty or len(data) > 0)
  36. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  37. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  38. )
  39. elif isinstance(data, list):
  40. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  41. elif isinstance(data, tuple):
  42. return len(data) == len(shape) and all(
  43. has_shape(data[i], shape[i]) for i in range(len(data))
  44. )
  45. else:
  46. return True
  47. class SnubaTSDBTest(TestCase, SnubaTestCase):
  48. def setUp(self):
  49. super().setUp()
  50. self.db = SnubaTSDB()
  51. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  52. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  53. )
  54. self.proj1 = self.create_project()
  55. env1 = "test"
  56. env2 = "dev"
  57. defaultenv = ""
  58. release1 = "1" * 10
  59. release2 = "2" * 10
  60. self.release1 = Release.objects.create(
  61. organization_id=self.organization.id, version=release1, date_added=self.now
  62. )
  63. self.release1.add_project(self.proj1)
  64. self.release2 = Release.objects.create(
  65. organization_id=self.organization.id, version=release2, date_added=self.now
  66. )
  67. self.release2.add_project(self.proj1)
  68. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  69. self.store_event(
  70. data={
  71. "event_id": (str(r) * 32)[:32],
  72. "message": "message 1",
  73. "platform": "python",
  74. "fingerprint": [["group-1"], ["group-2"]][
  75. (r // 600) % 2
  76. ], # Switch every 10 mins
  77. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  78. "tags": {
  79. "foo": "bar",
  80. "baz": "quux",
  81. # Switch every 2 hours
  82. "environment": [env1, None][(r // 7200) % 3],
  83. "sentry:user": f"id:user{r // 3300}",
  84. },
  85. "user": {
  86. # change every 55 min so some hours have 1 user, some have 2
  87. "id": f"user{r // 3300}",
  88. "email": f"user{r}@sentry.io",
  89. },
  90. "release": str(r // 3600) * 10, # 1 per hour,
  91. },
  92. project_id=self.proj1.id,
  93. )
  94. groups = Group.objects.filter(project=self.proj1).order_by("id")
  95. self.proj1group1 = groups[0]
  96. self.proj1group2 = groups[1]
  97. self.env1 = Environment.objects.get(name=env1)
  98. self.env2 = self.create_environment(name=env2) # No events
  99. self.defaultenv = Environment.objects.get(name=defaultenv)
  100. self.group1release1env1 = GroupRelease.objects.get(
  101. project_id=self.proj1.id,
  102. group_id=self.proj1group1.id,
  103. release_id=self.release1.id,
  104. environment=env1,
  105. )
  106. self.group1release2env1 = GroupRelease.objects.create(
  107. project_id=self.proj1.id,
  108. group_id=self.proj1group1.id,
  109. release_id=self.release2.id,
  110. environment=env1,
  111. )
  112. self.group2release1env1 = GroupRelease.objects.get(
  113. project_id=self.proj1.id,
  114. group_id=self.proj1group2.id,
  115. release_id=self.release1.id,
  116. environment=env1,
  117. )
  118. def test_range_single(self):
  119. env1 = "test"
  120. project = self.create_project()
  121. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  122. self.store_event(
  123. data={
  124. "event_id": (str(r) * 32)[:32],
  125. "message": "message 1",
  126. "platform": "python",
  127. "fingerprint": ["group-1"],
  128. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  129. "tags": {
  130. "foo": "bar",
  131. "baz": "quux",
  132. # Switch every 2 hours
  133. "environment": [env1, None][(r // 7200) % 3],
  134. "sentry:user": f"id:user{r // 3300}",
  135. },
  136. "user": {
  137. # change every 55 min so some hours have 1 user, some have 2
  138. "id": f"user{r // 3300}",
  139. "email": f"user{r}@sentry.io",
  140. },
  141. "release": str(r // 3600) * 10, # 1 per hour,
  142. },
  143. project_id=project.id,
  144. )
  145. groups = Group.objects.filter(project=project).order_by("id")
  146. group = groups[0]
  147. dts = [self.now + timedelta(hours=i) for i in range(4)]
  148. assert self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1], rollup=3600) == {
  149. group.id: [
  150. (timestamp(dts[0]), 6 * 2),
  151. (timestamp(dts[1]), 6 * 2),
  152. (timestamp(dts[2]), 6 * 2),
  153. (timestamp(dts[3]), 6 * 2),
  154. ]
  155. }
  156. def test_range_groups(self):
  157. dts = [self.now + timedelta(hours=i) for i in range(4)]
  158. assert self.db.get_range(
  159. TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  160. ) == {
  161. self.proj1group1.id: [
  162. (timestamp(dts[0]), 3),
  163. (timestamp(dts[1]), 3),
  164. (timestamp(dts[2]), 3),
  165. (timestamp(dts[3]), 3),
  166. ]
  167. }
  168. # Multiple groups
  169. assert self.db.get_range(
  170. TSDBModel.group,
  171. [self.proj1group1.id, self.proj1group2.id],
  172. dts[0],
  173. dts[-1],
  174. rollup=3600,
  175. ) == {
  176. self.proj1group1.id: [
  177. (timestamp(dts[0]), 3),
  178. (timestamp(dts[1]), 3),
  179. (timestamp(dts[2]), 3),
  180. (timestamp(dts[3]), 3),
  181. ],
  182. self.proj1group2.id: [
  183. (timestamp(dts[0]), 3),
  184. (timestamp(dts[1]), 3),
  185. (timestamp(dts[2]), 3),
  186. (timestamp(dts[3]), 3),
  187. ],
  188. }
  189. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  190. def test_range_releases(self):
  191. dts = [self.now + timedelta(hours=i) for i in range(4)]
  192. assert self.db.get_range(
  193. TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600
  194. ) == {
  195. self.release1.id: [
  196. (timestamp(dts[0]), 0),
  197. (timestamp(dts[1]), 6),
  198. (timestamp(dts[2]), 0),
  199. (timestamp(dts[3]), 0),
  200. ]
  201. }
  202. def test_range_project(self):
  203. dts = [self.now + timedelta(hours=i) for i in range(4)]
  204. assert self.db.get_range(
  205. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  206. ) == {
  207. self.proj1.id: [
  208. (timestamp(dts[0]), 6),
  209. (timestamp(dts[1]), 6),
  210. (timestamp(dts[2]), 6),
  211. (timestamp(dts[3]), 6),
  212. ]
  213. }
  214. def test_range_environment_filter(self):
  215. dts = [self.now + timedelta(hours=i) for i in range(4)]
  216. assert self.db.get_range(
  217. TSDBModel.project,
  218. [self.proj1.id],
  219. dts[0],
  220. dts[-1],
  221. rollup=3600,
  222. environment_ids=[self.env1.id],
  223. ) == {
  224. self.proj1.id: [
  225. (timestamp(dts[0]), 6),
  226. (timestamp(dts[1]), 6),
  227. (timestamp(dts[2]), 0),
  228. (timestamp(dts[3]), 0),
  229. ]
  230. }
  231. # No events submitted for env2
  232. assert self.db.get_range(
  233. TSDBModel.project,
  234. [self.proj1.id],
  235. dts[0],
  236. dts[-1],
  237. rollup=3600,
  238. environment_ids=[self.env2.id],
  239. ) == {
  240. self.proj1.id: [
  241. (timestamp(dts[0]), 0),
  242. (timestamp(dts[1]), 0),
  243. (timestamp(dts[2]), 0),
  244. (timestamp(dts[3]), 0),
  245. ]
  246. }
  247. # Events submitted with no environment should match default environment
  248. assert self.db.get_range(
  249. TSDBModel.project,
  250. [self.proj1.id],
  251. dts[0],
  252. dts[-1],
  253. rollup=3600,
  254. environment_ids=[self.defaultenv.id],
  255. ) == {
  256. self.proj1.id: [
  257. (timestamp(dts[0]), 0),
  258. (timestamp(dts[1]), 0),
  259. (timestamp(dts[2]), 6),
  260. (timestamp(dts[3]), 6),
  261. ]
  262. }
  263. def test_range_rollups(self):
  264. # Daily
  265. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  266. dts = [daystart + timedelta(days=i) for i in range(2)]
  267. assert self.db.get_range(
  268. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400
  269. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  270. # Minutely
  271. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  272. # Expect every 10th minute to have a 1, else 0
  273. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  274. assert self.db.get_range(
  275. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60
  276. ) == {self.proj1.id: expected}
  277. def test_distinct_counts_series_users(self):
  278. dts = [self.now + timedelta(hours=i) for i in range(4)]
  279. assert self.db.get_distinct_counts_series(
  280. TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  281. ) == {
  282. self.proj1group1.id: [
  283. (timestamp(dts[0]), 1),
  284. (timestamp(dts[1]), 1),
  285. (timestamp(dts[2]), 1),
  286. (timestamp(dts[3]), 2),
  287. ]
  288. }
  289. dts = [self.now + timedelta(hours=i) for i in range(4)]
  290. assert self.db.get_distinct_counts_series(
  291. TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  292. ) == {
  293. self.proj1.id: [
  294. (timestamp(dts[0]), 1),
  295. (timestamp(dts[1]), 2),
  296. (timestamp(dts[2]), 2),
  297. (timestamp(dts[3]), 2),
  298. ]
  299. }
  300. assert (
  301. self.db.get_distinct_counts_series(
  302. TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600
  303. )
  304. == {}
  305. )
  306. def get_distinct_counts_totals_users(self):
  307. assert self.db.get_distinct_counts_totals(
  308. TSDBModel.users_affected_by_group,
  309. [self.proj1group1.id],
  310. self.now,
  311. self.now + timedelta(hours=4),
  312. rollup=3600,
  313. ) == {
  314. self.proj1group1.id: 2 # 2 unique users overall
  315. }
  316. assert self.db.get_distinct_counts_totals(
  317. TSDBModel.users_affected_by_group,
  318. [self.proj1group1.id],
  319. self.now,
  320. self.now,
  321. rollup=3600,
  322. ) == {
  323. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  324. }
  325. assert self.db.get_distinct_counts_totals(
  326. TSDBModel.users_affected_by_project,
  327. [self.proj1.id],
  328. self.now,
  329. self.now + timedelta(hours=4),
  330. rollup=3600,
  331. ) == {self.proj1.id: 2}
  332. assert (
  333. self.db.get_distinct_counts_totals(
  334. TSDBModel.users_affected_by_group,
  335. [],
  336. self.now,
  337. self.now + timedelta(hours=4),
  338. rollup=3600,
  339. )
  340. == {}
  341. )
  342. def test_most_frequent(self):
  343. assert self.db.get_most_frequent(
  344. TSDBModel.frequent_issues_by_project,
  345. [self.proj1.id],
  346. self.now,
  347. self.now + timedelta(hours=4),
  348. rollup=3600,
  349. ) in [
  350. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  351. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  352. ] # Both issues equally frequent
  353. assert (
  354. self.db.get_most_frequent(
  355. TSDBModel.frequent_issues_by_project,
  356. [],
  357. self.now,
  358. self.now + timedelta(hours=4),
  359. rollup=3600,
  360. )
  361. == {}
  362. )
  363. def test_frequency_series(self):
  364. dts = [self.now + timedelta(hours=i) for i in range(4)]
  365. assert self.db.get_frequency_series(
  366. TSDBModel.frequent_releases_by_group,
  367. {
  368. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  369. self.proj1group2.id: (self.group2release1env1.id,),
  370. },
  371. dts[0],
  372. dts[-1],
  373. rollup=3600,
  374. ) == {
  375. self.proj1group1.id: [
  376. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  377. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  378. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  379. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  380. ],
  381. self.proj1group2.id: [
  382. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  383. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  384. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  385. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  386. ],
  387. }
  388. assert (
  389. self.db.get_frequency_series(
  390. TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600
  391. )
  392. == {}
  393. )
  394. def test_result_shape(self):
  395. """
  396. Tests that the results from the different TSDB methods have the
  397. expected format.
  398. """
  399. project_id = self.proj1.id
  400. dts = [self.now + timedelta(hours=i) for i in range(4)]
  401. results = self.db.get_most_frequent(
  402. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  403. )
  404. assert has_shape(results, {1: [(1, 1.0)]})
  405. results = self.db.get_most_frequent_series(
  406. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  407. )
  408. assert has_shape(results, {1: [(1, {1: 1.0})]})
  409. items = {
  410. # {project_id: (issue_id, issue_id, ...)}
  411. project_id: (self.proj1group1.id, self.proj1group2.id)
  412. }
  413. results = self.db.get_frequency_series(
  414. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  415. )
  416. assert has_shape(results, {1: [(1, {1: 1})]})
  417. results = self.db.get_frequency_totals(
  418. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  419. )
  420. assert has_shape(results, {1: {1: 1}})
  421. results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
  422. assert has_shape(results, {1: [(1, 1)]})
  423. results = self.db.get_distinct_counts_series(
  424. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  425. )
  426. assert has_shape(results, {1: [(1, 1)]})
  427. results = self.db.get_distinct_counts_totals(
  428. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  429. )
  430. assert has_shape(results, {1: 1})
  431. results = self.db.get_distinct_counts_union(
  432. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  433. )
  434. assert has_shape(results, 1)
  435. def test_calculated_limit(self):
  436. with patch("sentry.tsdb.snuba.snuba") as snuba:
  437. # 24h test
  438. rollup = 3600
  439. end = self.now
  440. start = end + timedelta(days=-1, seconds=rollup)
  441. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  442. assert snuba.query.call_args[1]["limit"] == 120
  443. # 14 day test
  444. rollup = 86400
  445. start = end + timedelta(days=-14, seconds=rollup)
  446. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  447. assert snuba.query.call_args[1]["limit"] == 70
  448. # 1h test
  449. rollup = 3600
  450. end = self.now
  451. start = end + timedelta(hours=-1, seconds=rollup)
  452. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  453. assert snuba.query.call_args[1]["limit"] == 5
  454. class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase):
  455. def setUp(self):
  456. super().setUp()
  457. self.db = SnubaTSDB()
  458. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  459. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  460. )
  461. self.proj1 = self.create_project()
  462. env1 = "test"
  463. env2 = "dev"
  464. defaultenv = ""
  465. self.proj1group1 = self.create_group(project=self.proj1)
  466. self.proj1group2 = self.create_group(project=self.proj1)
  467. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  468. self.__insert_transaction(
  469. environment=[env1, None][(r // 7200) % 3],
  470. project_id=self.proj1.id,
  471. # change every 55 min so some hours have 1 user, some have 2
  472. user_id=f"user{r // 3300}",
  473. email=f"user{r}@sentry.io",
  474. # release_version=str(r // 3600) * 10, # 1 per hour,
  475. insert_timestamp=self.now + timedelta(seconds=r),
  476. groups=[[self.proj1group1], [self.proj1group2]][(r // 600) % 2],
  477. transaction_name=str(r),
  478. )
  479. self.env1 = Environment.objects.get(name=env1)
  480. self.env2 = self.create_environment(name=env2) # No events
  481. self.defaultenv = Environment.objects.get(name=defaultenv)
  482. def __insert_transaction(
  483. self,
  484. environment: Optional[str],
  485. project_id: int,
  486. user_id: str,
  487. email: str,
  488. insert_timestamp: datetime,
  489. groups: Sequence[int],
  490. transaction_name: str,
  491. ):
  492. def inject_group_ids(jobs, projects, _groups=None):
  493. _pull_out_data(jobs, projects)
  494. if _groups:
  495. for job in jobs:
  496. job["event"].groups = _groups
  497. return jobs, projects
  498. event_data = {
  499. "type": "transaction",
  500. "level": "info",
  501. "message": "transaction message",
  502. "tags": {
  503. "environment": environment,
  504. "sentry:user": f"id:{user_id}",
  505. },
  506. "user": {
  507. "id": user_id,
  508. "email": email,
  509. },
  510. "contexts": {"trace": {"trace_id": "b" * 32, "span_id": "c" * 16, "op": ""}},
  511. "timestamp": insert_timestamp.timestamp(),
  512. "start_timestamp": insert_timestamp.timestamp(),
  513. "transaction": transaction_name,
  514. }
  515. with mock.patch(
  516. "sentry.event_manager._pull_out_data",
  517. functools.partial(
  518. inject_group_ids,
  519. _groups=groups,
  520. ),
  521. ):
  522. event = self.store_event(
  523. data=event_data,
  524. project_id=project_id,
  525. )
  526. assert event
  527. from sentry.utils import snuba
  528. result = snuba.raw_query(
  529. dataset=snuba.Dataset.Transactions,
  530. start=insert_timestamp - timedelta(days=1),
  531. end=insert_timestamp + timedelta(days=1),
  532. selected_columns=[
  533. "event_id",
  534. "project_id",
  535. "environment",
  536. "group_ids",
  537. "tags[sentry:user]",
  538. "timestamp",
  539. ],
  540. groupby=None,
  541. filter_keys={"project_id": [project_id], "event_id": [event.event_id]},
  542. )
  543. assert len(result["data"]) == 1
  544. assert result["data"][0]["event_id"] == event.event_id
  545. assert result["data"][0]["project_id"] == event.project_id
  546. assert result["data"][0]["group_ids"] == [g.id for g in groups]
  547. assert result["data"][0]["tags[sentry:user]"] == f"id:{user_id}"
  548. assert result["data"][0]["environment"] == (environment if environment else None)
  549. assert result["data"][0]["timestamp"] == insert_timestamp.isoformat()
  550. return event
  551. def test_range_groups_single(self):
  552. from sentry.snuba.dataset import Dataset
  553. now = (datetime.utcnow() - timedelta(hours=4)).replace(
  554. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  555. )
  556. dts = [now + timedelta(hours=i) for i in range(4)]
  557. project = self.create_project()
  558. group = self.create_group(project=project, first_seen=now)
  559. # not sure what's going on here, but `times=1,2,3,4` work fine
  560. # fails with anything above 4
  561. times = 4
  562. event_ids = []
  563. events = []
  564. for i in range(0, times):
  565. res = self.__insert_transaction(
  566. environment=None,
  567. project_id=project.id,
  568. user_id="my_user",
  569. email="test@email.com",
  570. insert_timestamp=now + timedelta(minutes=i * 10),
  571. groups=[group],
  572. transaction_name=str(i),
  573. )
  574. grouped_by_project = aliased_query(
  575. dataset=Dataset.Transactions,
  576. start=None,
  577. end=None,
  578. groupby=None,
  579. conditions=None,
  580. filter_keys={"project_id": [project.id], "event_id": [res.event_id]},
  581. selected_columns=["event_id", "project_id", "group_ids"],
  582. aggregations=None,
  583. )
  584. assert grouped_by_project["data"][0]["event_id"] == res.event_id
  585. from sentry.eventstore.models import Event
  586. event_from_nodestore = Event(project_id=project.id, event_id=res.event_id)
  587. assert event_from_nodestore.event_id == res.event_id
  588. event_ids.append(res.event_id)
  589. events.append(res)
  590. transactions_for_project = aliased_query(
  591. dataset=Dataset.Transactions,
  592. start=None,
  593. end=None,
  594. groupby=None,
  595. conditions=None,
  596. filter_keys={"project_id": [project.id]},
  597. selected_columns=["project_id", "event_id"],
  598. aggregations=None,
  599. )
  600. assert len(transactions_for_project["data"]) == times
  601. transactions_by_group = aliased_query(
  602. dataset=Dataset.Transactions,
  603. start=None,
  604. end=None,
  605. # start=group.first_seen,
  606. # end=now + timedelta(hours=4),
  607. groupby=["group_id"],
  608. conditions=None,
  609. filter_keys={"project_id": [project.id], "group_id": [group.id]},
  610. aggregations=[
  611. ["arrayJoin", ["group_ids"], "group_id"],
  612. ["count()", "", "times_seen"],
  613. ],
  614. )
  615. assert transactions_by_group["data"][0]["times_seen"] == times # 1 + (times % 5)
  616. assert self.db.get_range(
  617. TSDBModel.group_performance,
  618. [group.id],
  619. dts[0],
  620. dts[-1],
  621. rollup=3600,
  622. ) == {
  623. group.id: [
  624. # (timestamp(dts[0]), 1 + (times % 5)),
  625. (timestamp(dts[0]), times),
  626. (timestamp(dts[1]), 0),
  627. (timestamp(dts[2]), 0),
  628. (timestamp(dts[3]), 0),
  629. ]
  630. }
  631. def test_range_groups_mult(self):
  632. now = (datetime.utcnow() - timedelta(hours=4)).replace(
  633. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  634. )
  635. dts = [now + timedelta(hours=i) for i in range(4)]
  636. project = self.create_project()
  637. group = self.create_group(project=project)
  638. ids = ["a", "b", "c", "d", "e", "f", "1", "2", "3", "4", "5"]
  639. for i, _id in enumerate(ids):
  640. self.__insert_transaction(
  641. environment=None,
  642. project_id=project.id,
  643. user_id="my_user",
  644. email="test@email.com",
  645. insert_timestamp=now + timedelta(minutes=i * 10),
  646. groups=[group],
  647. transaction_name=_id,
  648. )
  649. assert self.db.get_range(
  650. TSDBModel.group_performance,
  651. [group.id],
  652. dts[0],
  653. dts[-1],
  654. rollup=3600,
  655. ) == {
  656. group.id: [
  657. (timestamp(dts[0]), 6),
  658. (timestamp(dts[1]), 5),
  659. (timestamp(dts[2]), 0),
  660. (timestamp(dts[3]), 0),
  661. ]
  662. }
  663. def test_range_groups_simple(self):
  664. project = self.create_project()
  665. group = self.create_group(project=project)
  666. now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC)
  667. # for r in range(0, 14400, 600): # Every 10 min for 4 hours
  668. # for r in [1, 2, 3, 4, 5, 6, 7, 8]:
  669. ids = ["a", "b", "c", "d", "e"] # , "f"]
  670. for r in ids:
  671. # for r in range(0, 9, 1):
  672. self.__insert_transaction(
  673. environment=None,
  674. project_id=project.id,
  675. # change every 55 min so some hours have 1 user, some have 2
  676. user_id=f"user{r}",
  677. email=f"user{r}@sentry.io",
  678. # release_version=str(r // 3600) * 10, # 1 per hour,
  679. insert_timestamp=now,
  680. groups=[group],
  681. transaction_name=r,
  682. )
  683. dts = [now + timedelta(hours=i) for i in range(4)]
  684. assert self.db.get_range(
  685. TSDBModel.group_performance,
  686. [group.id],
  687. dts[0],
  688. dts[-1],
  689. rollup=3600,
  690. ) == {
  691. group.id: [
  692. (timestamp(dts[0]), len(ids)),
  693. (timestamp(dts[1]), 0),
  694. (timestamp(dts[2]), 0),
  695. (timestamp(dts[3]), 0),
  696. ]
  697. }
  698. def test_range_groups(self):
  699. dts = [self.now + timedelta(hours=i) for i in range(4)]
  700. # Multiple groups
  701. assert self.db.get_range(
  702. TSDBModel.group_performance,
  703. [self.proj1group1.id, self.proj1group2.id],
  704. dts[0],
  705. dts[-1],
  706. rollup=3600,
  707. ) == {
  708. self.proj1group1.id: [
  709. (timestamp(dts[0]), 3),
  710. (timestamp(dts[1]), 3),
  711. (timestamp(dts[2]), 3),
  712. (timestamp(dts[3]), 3),
  713. ],
  714. self.proj1group2.id: [
  715. (timestamp(dts[0]), 3),
  716. (timestamp(dts[1]), 3),
  717. (timestamp(dts[2]), 3),
  718. (timestamp(dts[3]), 3),
  719. ],
  720. }
  721. assert (
  722. self.db.get_range(TSDBModel.group_performance, [], dts[0], dts[-1], rollup=3600) == {}
  723. )
  724. class AddJitterToSeriesTest(TestCase):
  725. def setUp(self):
  726. self.db = SnubaTSDB()
  727. def run_test(self, end, interval, jitter, expected_start, expected_end):
  728. end = end.replace(tzinfo=pytz.UTC)
  729. start = end - interval
  730. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  731. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  732. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  733. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  734. def test(self):
  735. self.run_test(
  736. end=datetime(2022, 5, 18, 10, 23, 4),
  737. interval=timedelta(hours=1),
  738. jitter=5,
  739. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  740. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  741. )
  742. self.run_test(
  743. end=datetime(2022, 5, 18, 10, 23, 8),
  744. interval=timedelta(hours=1),
  745. jitter=5,
  746. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  747. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  748. )
  749. # Jitter should be the same
  750. self.run_test(
  751. end=datetime(2022, 5, 18, 10, 23, 8),
  752. interval=timedelta(hours=1),
  753. jitter=55,
  754. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  755. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  756. )
  757. self.run_test(
  758. end=datetime(2022, 5, 18, 22, 33, 2),
  759. interval=timedelta(minutes=1),
  760. jitter=3,
  761. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  762. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  763. )
  764. def test_empty_series(self):
  765. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  766. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []