test_tsdb_backend.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959
  1. from datetime import datetime, timedelta
  2. from unittest.mock import patch
  3. import pytz
  4. from sentry.models import Environment, Group, GroupRelease, Release
  5. from sentry.testutils import SnubaTestCase, TestCase
  6. from sentry.testutils.helpers.datetime import iso_format
  7. from sentry.testutils.performance_issues.store_transaction import PerfIssueTransactionTestMixin
  8. from sentry.testutils.silo import region_silo_test
  9. from sentry.tsdb.base import TSDBModel
  10. from sentry.tsdb.snuba import SnubaTSDB
  11. from sentry.types.issues import GroupType
  12. from sentry.utils.dates import to_datetime, to_timestamp
  13. from sentry.utils.snuba import aliased_query
  14. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  15. def timestamp(d):
  16. t = int(to_timestamp(d))
  17. return t - (t % 3600)
  18. def has_shape(data, shape, allow_empty=False):
  19. """
  20. Determine if a data object has the provided shape
  21. At any level, the object in `data` and in `shape` must have the same type.
  22. A dict is the same shape if all its keys and values have the same shape as the
  23. key/value in `shape`. The number of keys/values is not relevant.
  24. A list is the same shape if all its items have the same shape as the value
  25. in `shape`
  26. A tuple is the same shape if it has the same length as `shape` and all the
  27. values have the same shape as the corresponding value in `shape`
  28. Any other object simply has to have the same type.
  29. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  30. """
  31. if not isinstance(data, type(shape)):
  32. return False
  33. if isinstance(data, dict):
  34. return (
  35. (allow_empty or len(data) > 0)
  36. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  37. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  38. )
  39. elif isinstance(data, list):
  40. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  41. elif isinstance(data, tuple):
  42. return len(data) == len(shape) and all(
  43. has_shape(data[i], shape[i]) for i in range(len(data))
  44. )
  45. else:
  46. return True
  47. class SnubaTSDBTest(TestCase, SnubaTestCase):
  48. def setUp(self):
  49. super().setUp()
  50. self.db = SnubaTSDB()
  51. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  52. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  53. )
  54. self.proj1 = self.create_project()
  55. env1 = "test"
  56. env2 = "dev"
  57. defaultenv = ""
  58. release1 = "1" * 10
  59. release2 = "2" * 10
  60. self.release1 = Release.objects.create(
  61. organization_id=self.organization.id, version=release1, date_added=self.now
  62. )
  63. self.release1.add_project(self.proj1)
  64. self.release2 = Release.objects.create(
  65. organization_id=self.organization.id, version=release2, date_added=self.now
  66. )
  67. self.release2.add_project(self.proj1)
  68. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  69. self.store_event(
  70. data={
  71. "event_id": (str(r) * 32)[:32],
  72. "message": "message 1",
  73. "platform": "python",
  74. "fingerprint": [["group-1"], ["group-2"]][
  75. (r // 600) % 2
  76. ], # Switch every 10 mins
  77. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  78. "tags": {
  79. "foo": "bar",
  80. "baz": "quux",
  81. # Switch every 2 hours
  82. "environment": [env1, None][(r // 7200) % 3],
  83. "sentry:user": f"id:user{r // 3300}",
  84. },
  85. "user": {
  86. # change every 55 min so some hours have 1 user, some have 2
  87. "id": f"user{r // 3300}",
  88. },
  89. "release": str(r // 3600) * 10, # 1 per hour,
  90. },
  91. project_id=self.proj1.id,
  92. )
  93. groups = Group.objects.filter(project=self.proj1).order_by("id")
  94. self.proj1group1 = groups[0]
  95. self.proj1group2 = groups[1]
  96. self.env1 = Environment.objects.get(name=env1)
  97. self.env2 = self.create_environment(name=env2) # No events
  98. self.defaultenv = Environment.objects.get(name=defaultenv)
  99. self.group1release1env1 = GroupRelease.objects.get(
  100. project_id=self.proj1.id,
  101. group_id=self.proj1group1.id,
  102. release_id=self.release1.id,
  103. environment=env1,
  104. )
  105. self.group1release2env1 = GroupRelease.objects.create(
  106. project_id=self.proj1.id,
  107. group_id=self.proj1group1.id,
  108. release_id=self.release2.id,
  109. environment=env1,
  110. )
  111. self.group2release1env1 = GroupRelease.objects.get(
  112. project_id=self.proj1.id,
  113. group_id=self.proj1group2.id,
  114. release_id=self.release1.id,
  115. environment=env1,
  116. )
  117. def test_range_single(self):
  118. env1 = "test"
  119. project = self.create_project()
  120. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  121. self.store_event(
  122. data={
  123. "event_id": (str(r) * 32)[:32],
  124. "message": "message 1",
  125. "platform": "python",
  126. "fingerprint": ["group-1"],
  127. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  128. "tags": {
  129. "foo": "bar",
  130. "baz": "quux",
  131. # Switch every 2 hours
  132. "environment": [env1, None][(r // 7200) % 3],
  133. "sentry:user": f"id:user{r // 3300}",
  134. },
  135. "user": {
  136. # change every 55 min so some hours have 1 user, some have 2
  137. "id": f"user{r // 3300}",
  138. },
  139. "release": str(r // 3600) * 10, # 1 per hour,
  140. },
  141. project_id=project.id,
  142. )
  143. groups = Group.objects.filter(project=project).order_by("id")
  144. group = groups[0]
  145. dts = [self.now + timedelta(hours=i) for i in range(4)]
  146. assert self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1], rollup=3600) == {
  147. group.id: [
  148. (timestamp(dts[0]), 6 * 2),
  149. (timestamp(dts[1]), 6 * 2),
  150. (timestamp(dts[2]), 6 * 2),
  151. (timestamp(dts[3]), 6 * 2),
  152. ]
  153. }
  154. def test_range_groups(self):
  155. dts = [self.now + timedelta(hours=i) for i in range(4)]
  156. assert self.db.get_range(
  157. TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  158. ) == {
  159. self.proj1group1.id: [
  160. (timestamp(dts[0]), 3),
  161. (timestamp(dts[1]), 3),
  162. (timestamp(dts[2]), 3),
  163. (timestamp(dts[3]), 3),
  164. ]
  165. }
  166. # Multiple groups
  167. assert self.db.get_range(
  168. TSDBModel.group,
  169. [self.proj1group1.id, self.proj1group2.id],
  170. dts[0],
  171. dts[-1],
  172. rollup=3600,
  173. ) == {
  174. self.proj1group1.id: [
  175. (timestamp(dts[0]), 3),
  176. (timestamp(dts[1]), 3),
  177. (timestamp(dts[2]), 3),
  178. (timestamp(dts[3]), 3),
  179. ],
  180. self.proj1group2.id: [
  181. (timestamp(dts[0]), 3),
  182. (timestamp(dts[1]), 3),
  183. (timestamp(dts[2]), 3),
  184. (timestamp(dts[3]), 3),
  185. ],
  186. }
  187. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  188. def test_range_releases(self):
  189. dts = [self.now + timedelta(hours=i) for i in range(4)]
  190. assert self.db.get_range(
  191. TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600
  192. ) == {
  193. self.release1.id: [
  194. (timestamp(dts[0]), 0),
  195. (timestamp(dts[1]), 6),
  196. (timestamp(dts[2]), 0),
  197. (timestamp(dts[3]), 0),
  198. ]
  199. }
  200. def test_range_project(self):
  201. dts = [self.now + timedelta(hours=i) for i in range(4)]
  202. assert self.db.get_range(
  203. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  204. ) == {
  205. self.proj1.id: [
  206. (timestamp(dts[0]), 6),
  207. (timestamp(dts[1]), 6),
  208. (timestamp(dts[2]), 6),
  209. (timestamp(dts[3]), 6),
  210. ]
  211. }
  212. def test_range_environment_filter(self):
  213. dts = [self.now + timedelta(hours=i) for i in range(4)]
  214. assert self.db.get_range(
  215. TSDBModel.project,
  216. [self.proj1.id],
  217. dts[0],
  218. dts[-1],
  219. rollup=3600,
  220. environment_ids=[self.env1.id],
  221. ) == {
  222. self.proj1.id: [
  223. (timestamp(dts[0]), 6),
  224. (timestamp(dts[1]), 6),
  225. (timestamp(dts[2]), 0),
  226. (timestamp(dts[3]), 0),
  227. ]
  228. }
  229. # No events submitted for env2
  230. assert self.db.get_range(
  231. TSDBModel.project,
  232. [self.proj1.id],
  233. dts[0],
  234. dts[-1],
  235. rollup=3600,
  236. environment_ids=[self.env2.id],
  237. ) == {
  238. self.proj1.id: [
  239. (timestamp(dts[0]), 0),
  240. (timestamp(dts[1]), 0),
  241. (timestamp(dts[2]), 0),
  242. (timestamp(dts[3]), 0),
  243. ]
  244. }
  245. # Events submitted with no environment should match default environment
  246. assert self.db.get_range(
  247. TSDBModel.project,
  248. [self.proj1.id],
  249. dts[0],
  250. dts[-1],
  251. rollup=3600,
  252. environment_ids=[self.defaultenv.id],
  253. ) == {
  254. self.proj1.id: [
  255. (timestamp(dts[0]), 0),
  256. (timestamp(dts[1]), 0),
  257. (timestamp(dts[2]), 6),
  258. (timestamp(dts[3]), 6),
  259. ]
  260. }
  261. def test_range_rollups(self):
  262. # Daily
  263. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  264. dts = [daystart + timedelta(days=i) for i in range(2)]
  265. assert self.db.get_range(
  266. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400
  267. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  268. # Minutely
  269. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  270. # Expect every 10th minute to have a 1, else 0
  271. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  272. assert self.db.get_range(
  273. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60
  274. ) == {self.proj1.id: expected}
  275. def test_distinct_counts_series_users(self):
  276. dts = [self.now + timedelta(hours=i) for i in range(4)]
  277. assert self.db.get_distinct_counts_series(
  278. TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  279. ) == {
  280. self.proj1group1.id: [
  281. (timestamp(dts[0]), 1),
  282. (timestamp(dts[1]), 1),
  283. (timestamp(dts[2]), 1),
  284. (timestamp(dts[3]), 2),
  285. ]
  286. }
  287. dts = [self.now + timedelta(hours=i) for i in range(4)]
  288. assert self.db.get_distinct_counts_series(
  289. TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  290. ) == {
  291. self.proj1.id: [
  292. (timestamp(dts[0]), 1),
  293. (timestamp(dts[1]), 2),
  294. (timestamp(dts[2]), 2),
  295. (timestamp(dts[3]), 2),
  296. ]
  297. }
  298. assert (
  299. self.db.get_distinct_counts_series(
  300. TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600
  301. )
  302. == {}
  303. )
  304. def get_distinct_counts_totals_users(self):
  305. assert self.db.get_distinct_counts_totals(
  306. TSDBModel.users_affected_by_group,
  307. [self.proj1group1.id],
  308. self.now,
  309. self.now + timedelta(hours=4),
  310. rollup=3600,
  311. ) == {
  312. self.proj1group1.id: 2 # 2 unique users overall
  313. }
  314. assert self.db.get_distinct_counts_totals(
  315. TSDBModel.users_affected_by_group,
  316. [self.proj1group1.id],
  317. self.now,
  318. self.now,
  319. rollup=3600,
  320. ) == {
  321. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  322. }
  323. assert self.db.get_distinct_counts_totals(
  324. TSDBModel.users_affected_by_project,
  325. [self.proj1.id],
  326. self.now,
  327. self.now + timedelta(hours=4),
  328. rollup=3600,
  329. ) == {self.proj1.id: 2}
  330. assert (
  331. self.db.get_distinct_counts_totals(
  332. TSDBModel.users_affected_by_group,
  333. [],
  334. self.now,
  335. self.now + timedelta(hours=4),
  336. rollup=3600,
  337. )
  338. == {}
  339. )
  340. def test_most_frequent(self):
  341. assert self.db.get_most_frequent(
  342. TSDBModel.frequent_issues_by_project,
  343. [self.proj1.id],
  344. self.now,
  345. self.now + timedelta(hours=4),
  346. rollup=3600,
  347. ) in [
  348. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  349. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  350. ] # Both issues equally frequent
  351. assert (
  352. self.db.get_most_frequent(
  353. TSDBModel.frequent_issues_by_project,
  354. [],
  355. self.now,
  356. self.now + timedelta(hours=4),
  357. rollup=3600,
  358. )
  359. == {}
  360. )
  361. def test_frequency_series(self):
  362. dts = [self.now + timedelta(hours=i) for i in range(4)]
  363. assert self.db.get_frequency_series(
  364. TSDBModel.frequent_releases_by_group,
  365. {
  366. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  367. self.proj1group2.id: (self.group2release1env1.id,),
  368. },
  369. dts[0],
  370. dts[-1],
  371. rollup=3600,
  372. ) == {
  373. self.proj1group1.id: [
  374. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  375. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  376. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  377. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  378. ],
  379. self.proj1group2.id: [
  380. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  381. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  382. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  383. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  384. ],
  385. }
  386. assert (
  387. self.db.get_frequency_series(
  388. TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600
  389. )
  390. == {}
  391. )
  392. def test_result_shape(self):
  393. """
  394. Tests that the results from the different TSDB methods have the
  395. expected format.
  396. """
  397. project_id = self.proj1.id
  398. dts = [self.now + timedelta(hours=i) for i in range(4)]
  399. results = self.db.get_most_frequent(
  400. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  401. )
  402. assert has_shape(results, {1: [(1, 1.0)]})
  403. results = self.db.get_most_frequent_series(
  404. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  405. )
  406. assert has_shape(results, {1: [(1, {1: 1.0})]})
  407. items = {
  408. # {project_id: (issue_id, issue_id, ...)}
  409. project_id: (self.proj1group1.id, self.proj1group2.id)
  410. }
  411. results = self.db.get_frequency_series(
  412. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  413. )
  414. assert has_shape(results, {1: [(1, {1: 1})]})
  415. results = self.db.get_frequency_totals(
  416. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  417. )
  418. assert has_shape(results, {1: {1: 1}})
  419. results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
  420. assert has_shape(results, {1: [(1, 1)]})
  421. results = self.db.get_distinct_counts_series(
  422. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  423. )
  424. assert has_shape(results, {1: [(1, 1)]})
  425. results = self.db.get_distinct_counts_totals(
  426. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  427. )
  428. assert has_shape(results, {1: 1})
  429. results = self.db.get_distinct_counts_union(
  430. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  431. )
  432. assert has_shape(results, 1)
  433. def test_calculated_limit(self):
  434. with patch("sentry.tsdb.snuba.snuba") as snuba:
  435. # 24h test
  436. rollup = 3600
  437. end = self.now
  438. start = end + timedelta(days=-1, seconds=rollup)
  439. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  440. assert snuba.query.call_args[1]["limit"] == 120
  441. # 14 day test
  442. rollup = 86400
  443. start = end + timedelta(days=-14, seconds=rollup)
  444. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  445. assert snuba.query.call_args[1]["limit"] == 70
  446. # 1h test
  447. rollup = 3600
  448. end = self.now
  449. start = end + timedelta(hours=-1, seconds=rollup)
  450. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  451. assert snuba.query.call_args[1]["limit"] == 5
  452. @region_silo_test
  453. class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin):
  454. def setUp(self):
  455. super().setUp()
  456. self.db = SnubaTSDB()
  457. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  458. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  459. )
  460. self.proj1 = self.create_project()
  461. self.env1 = Environment.objects.get_or_create(
  462. organization_id=self.proj1.organization_id, name="test"
  463. )[0]
  464. self.env2 = Environment.objects.get_or_create(
  465. organization_id=self.proj1.organization_id, name="dev"
  466. )[0]
  467. defaultenv = ""
  468. group1_fingerprint = f"{GroupType.PERFORMANCE_SLOW_SPAN.value}-group1"
  469. group2_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group2"
  470. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  471. event = self.store_transaction(
  472. environment=[self.env1.name, None][(r // 7200) % 3],
  473. project_id=self.proj1.id,
  474. # change every 55 min so some hours have 1 user, some have 2
  475. user_id=f"user{r // 3300}",
  476. # release_version=str(r // 3600) * 10, # 1 per hour,
  477. timestamp=self.now + timedelta(seconds=r),
  478. fingerprint=[group1_fingerprint, group2_fingerprint] if ((r // 600) % 2) else [],
  479. )
  480. self.proj1group1 = event.groups[0]
  481. self.proj1group2 = event.groups[1]
  482. self.defaultenv = Environment.objects.get(name=defaultenv)
  483. def test_range_groups_single(self):
  484. from sentry.snuba.dataset import Dataset
  485. now = (datetime.utcnow() - timedelta(days=1)).replace(
  486. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  487. )
  488. dts = [now + timedelta(hours=i) for i in range(4)]
  489. project = self.create_project()
  490. group_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group3"
  491. # not sure what's going on here, but `times=1,2,3,4` work fine
  492. # fails with anything above 4
  493. times = 4
  494. event_ids = []
  495. events = []
  496. for i in range(0, times):
  497. res = self.store_transaction(
  498. environment=None,
  499. project_id=project.id,
  500. user_id="my_user",
  501. timestamp=now + timedelta(minutes=i * 10),
  502. fingerprint=[group_fingerprint],
  503. )
  504. grouped_by_project = aliased_query(
  505. dataset=Dataset.Transactions,
  506. start=None,
  507. end=None,
  508. groupby=None,
  509. conditions=None,
  510. filter_keys={"project_id": [project.id], "event_id": [res.event_id]},
  511. selected_columns=["event_id", "project_id", "group_ids"],
  512. aggregations=None,
  513. )
  514. assert grouped_by_project["data"][0]["event_id"] == res.event_id
  515. from sentry.eventstore.models import Event
  516. event_from_nodestore = Event(project_id=project.id, event_id=res.event_id)
  517. assert event_from_nodestore.event_id == res.event_id
  518. event_ids.append(res.event_id)
  519. events.append(res)
  520. group = events[0].groups[0]
  521. transactions_for_project = aliased_query(
  522. dataset=Dataset.Transactions,
  523. start=None,
  524. end=None,
  525. groupby=None,
  526. conditions=None,
  527. filter_keys={"project_id": [project.id]},
  528. selected_columns=["project_id", "event_id"],
  529. aggregations=None,
  530. )
  531. assert len(transactions_for_project["data"]) == times
  532. transactions_by_group = aliased_query(
  533. dataset=Dataset.Transactions,
  534. start=None,
  535. end=None,
  536. # start=group.first_seen,
  537. # end=now + timedelta(hours=4),
  538. groupby=["group_id"],
  539. conditions=None,
  540. filter_keys={"project_id": [project.id], "group_id": [group.id]},
  541. aggregations=[
  542. ["arrayJoin", ["group_ids"], "group_id"],
  543. ["count()", "", "times_seen"],
  544. ],
  545. )
  546. assert transactions_by_group["data"][0]["times_seen"] == times # 1 + (times % 5)
  547. assert self.db.get_range(
  548. TSDBModel.group_performance,
  549. [group.id],
  550. dts[0],
  551. dts[-1],
  552. rollup=3600,
  553. ) == {
  554. group.id: [
  555. # (timestamp(dts[0]), 1 + (times % 5)),
  556. (timestamp(dts[0]), times),
  557. (timestamp(dts[1]), 0),
  558. (timestamp(dts[2]), 0),
  559. (timestamp(dts[3]), 0),
  560. ]
  561. }
  562. def test_range_groups_mult(self):
  563. now = (datetime.utcnow() - timedelta(days=1)).replace(
  564. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  565. )
  566. dts = [now + timedelta(hours=i) for i in range(4)]
  567. project = self.create_project()
  568. group_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group4"
  569. ids = ["a", "b", "c", "d", "e", "f", "1", "2", "3", "4", "5"]
  570. events = []
  571. for i, _ in enumerate(ids):
  572. event = self.store_transaction(
  573. environment=None,
  574. project_id=project.id,
  575. user_id="my_user",
  576. timestamp=now + timedelta(minutes=i * 10),
  577. fingerprint=[group_fingerprint],
  578. )
  579. events.append(event)
  580. group = events[0].groups[0]
  581. assert self.db.get_range(
  582. TSDBModel.group_performance,
  583. [group.id],
  584. dts[0],
  585. dts[-1],
  586. rollup=3600,
  587. ) == {
  588. group.id: [
  589. (timestamp(dts[0]), 6),
  590. (timestamp(dts[1]), 5),
  591. (timestamp(dts[2]), 0),
  592. (timestamp(dts[3]), 0),
  593. ]
  594. }
  595. def test_range_groups_simple(self):
  596. project = self.create_project()
  597. now = (datetime.utcnow() - timedelta(days=1)).replace(
  598. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  599. )
  600. group_fingerprint = f"{GroupType.PERFORMANCE_SLOW_SPAN.value}-group5"
  601. # for r in range(0, 14400, 600): # Every 10 min for 4 hours
  602. # for r in [1, 2, 3, 4, 5, 6, 7, 8]:
  603. ids = ["a", "b", "c", "d", "e"] # , "f"]
  604. events = []
  605. for r in ids:
  606. # for r in range(0, 9, 1):
  607. event = self.store_transaction(
  608. environment=None,
  609. project_id=project.id,
  610. # change every 55 min so some hours have 1 user, some have 2
  611. user_id=f"user{r}",
  612. # release_version=str(r // 3600) * 10, # 1 per hour,
  613. timestamp=now,
  614. fingerprint=[group_fingerprint],
  615. )
  616. events.append(event)
  617. group = events[0].groups[0]
  618. dts = [now + timedelta(hours=i) for i in range(4)]
  619. assert self.db.get_range(
  620. TSDBModel.group_performance,
  621. [group.id],
  622. dts[0],
  623. dts[-1],
  624. rollup=3600,
  625. ) == {
  626. group.id: [
  627. (timestamp(dts[0]), len(ids)),
  628. (timestamp(dts[1]), 0),
  629. (timestamp(dts[2]), 0),
  630. (timestamp(dts[3]), 0),
  631. ]
  632. }
  633. def test_range_groups(self):
  634. dts = [self.now + timedelta(hours=i) for i in range(4)]
  635. # Multiple groups
  636. assert self.db.get_range(
  637. TSDBModel.group_performance,
  638. [self.proj1group1.id, self.proj1group2.id],
  639. dts[0],
  640. dts[-1],
  641. rollup=3600,
  642. ) == {
  643. self.proj1group1.id: [
  644. (timestamp(dts[0]), 3),
  645. (timestamp(dts[1]), 3),
  646. (timestamp(dts[2]), 3),
  647. (timestamp(dts[3]), 3),
  648. ],
  649. self.proj1group2.id: [
  650. (timestamp(dts[0]), 3),
  651. (timestamp(dts[1]), 3),
  652. (timestamp(dts[2]), 3),
  653. (timestamp(dts[3]), 3),
  654. ],
  655. }
  656. assert (
  657. self.db.get_range(TSDBModel.group_performance, [], dts[0], dts[-1], rollup=3600) == {}
  658. )
  659. @region_silo_test
  660. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  661. def setUp(self):
  662. super().setUp()
  663. self.db = SnubaTSDB()
  664. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  665. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  666. )
  667. self.proj1 = self.create_project()
  668. self.env1 = Environment.objects.get_or_create(
  669. organization_id=self.proj1.organization_id, name="test"
  670. )[0]
  671. self.env2 = Environment.objects.get_or_create(
  672. organization_id=self.proj1.organization_id, name="dev"
  673. )[0]
  674. defaultenv = ""
  675. group1_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group1"
  676. group2_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group2"
  677. groups = {}
  678. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  679. event, occurrence, group_info = self.store_search_issue(
  680. project_id=self.proj1.id,
  681. # change every 55 min so some hours have 1 user, some have 2
  682. user_id=r // 3300,
  683. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  684. # release_version=str(r // 3600) * 10, # 1 per hour,
  685. environment=[self.env1.name, None][(r // 7200) % 3],
  686. insert_time=self.now + timedelta(seconds=r),
  687. )
  688. if group_info:
  689. groups[group_info.group.id] = group_info.group
  690. all_groups = list(groups.values())
  691. self.proj1group1 = all_groups[0]
  692. self.proj1group2 = all_groups[1]
  693. self.defaultenv = Environment.objects.get(name=defaultenv)
  694. def test_range_groups_mult(self):
  695. now = (datetime.utcnow() - timedelta(days=1)).replace(
  696. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  697. )
  698. dts = [now + timedelta(hours=i) for i in range(4)]
  699. project = self.create_project()
  700. group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group4"
  701. groups = []
  702. for i in range(0, 11):
  703. _, _, group_info = self.store_search_issue(
  704. project_id=project.id,
  705. user_id=0,
  706. fingerprints=[group_fingerprint],
  707. environment=None,
  708. insert_time=now + timedelta(minutes=i * 10),
  709. )
  710. if group_info:
  711. groups.append(group_info.group)
  712. group = groups[0]
  713. assert self.db.get_range(
  714. TSDBModel.group_generic,
  715. [group.id],
  716. dts[0],
  717. dts[-1],
  718. rollup=3600,
  719. ) == {
  720. group.id: [
  721. (timestamp(dts[0]), 6),
  722. (timestamp(dts[1]), 5),
  723. (timestamp(dts[2]), 0),
  724. (timestamp(dts[3]), 0),
  725. ]
  726. }
  727. def test_range_groups_simple(self):
  728. project = self.create_project()
  729. now = (datetime.utcnow() - timedelta(days=1)).replace(
  730. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  731. )
  732. group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group5"
  733. ids = [1, 2, 3, 4, 5]
  734. groups = []
  735. for r in ids:
  736. # for r in range(0, 9, 1):
  737. event, occurrence, group_info = self.store_search_issue(
  738. project_id=project.id,
  739. # change every 55 min so some hours have 1 user, some have 2
  740. user_id=r,
  741. fingerprints=[group_fingerprint],
  742. environment=None,
  743. # release_version=str(r // 3600) * 10, # 1 per hour,
  744. insert_time=now,
  745. )
  746. if group_info:
  747. groups.append(group_info.group)
  748. group = groups[0]
  749. dts = [now + timedelta(hours=i) for i in range(4)]
  750. assert self.db.get_range(
  751. TSDBModel.group_generic,
  752. [group.id],
  753. dts[0],
  754. dts[-1],
  755. rollup=3600,
  756. ) == {
  757. group.id: [
  758. (timestamp(dts[0]), len(ids)),
  759. (timestamp(dts[1]), 0),
  760. (timestamp(dts[2]), 0),
  761. (timestamp(dts[3]), 0),
  762. ]
  763. }
  764. def test_range_groups(self):
  765. dts = [self.now + timedelta(hours=i) for i in range(4)]
  766. # Multiple groups
  767. assert self.db.get_range(
  768. TSDBModel.group_generic,
  769. [self.proj1group1.id, self.proj1group2.id],
  770. dts[0],
  771. dts[-1],
  772. rollup=3600,
  773. ) == {
  774. self.proj1group1.id: [
  775. (timestamp(dts[0]), 3),
  776. (timestamp(dts[1]), 3),
  777. (timestamp(dts[2]), 3),
  778. (timestamp(dts[3]), 3),
  779. ],
  780. self.proj1group2.id: [
  781. (timestamp(dts[0]), 3),
  782. (timestamp(dts[1]), 3),
  783. (timestamp(dts[2]), 3),
  784. (timestamp(dts[3]), 3),
  785. ],
  786. }
  787. assert self.db.get_range(TSDBModel.group_generic, [], dts[0], dts[-1], rollup=3600) == {}
  788. def test_get_distinct_counts_totals_users(self):
  789. assert self.db.get_distinct_counts_totals(
  790. TSDBModel.users_affected_by_generic_group,
  791. [self.proj1group1.id],
  792. self.now,
  793. self.now + timedelta(hours=4),
  794. rollup=3600,
  795. ) == {
  796. self.proj1group1.id: 5 # 5 unique users overall
  797. }
  798. assert self.db.get_distinct_counts_totals(
  799. TSDBModel.users_affected_by_generic_group,
  800. [self.proj1group1.id],
  801. self.now,
  802. self.now,
  803. rollup=3600,
  804. ) == {
  805. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  806. }
  807. assert (
  808. self.db.get_distinct_counts_totals(
  809. TSDBModel.users_affected_by_generic_group,
  810. [],
  811. self.now,
  812. self.now + timedelta(hours=4),
  813. rollup=3600,
  814. )
  815. == {}
  816. )
  817. class AddJitterToSeriesTest(TestCase):
  818. def setUp(self):
  819. self.db = SnubaTSDB()
  820. def run_test(self, end, interval, jitter, expected_start, expected_end):
  821. end = end.replace(tzinfo=pytz.UTC)
  822. start = end - interval
  823. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  824. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  825. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  826. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  827. def test(self):
  828. self.run_test(
  829. end=datetime(2022, 5, 18, 10, 23, 4),
  830. interval=timedelta(hours=1),
  831. jitter=5,
  832. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  833. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  834. )
  835. self.run_test(
  836. end=datetime(2022, 5, 18, 10, 23, 8),
  837. interval=timedelta(hours=1),
  838. jitter=5,
  839. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  840. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  841. )
  842. # Jitter should be the same
  843. self.run_test(
  844. end=datetime(2022, 5, 18, 10, 23, 8),
  845. interval=timedelta(hours=1),
  846. jitter=55,
  847. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  848. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  849. )
  850. self.run_test(
  851. end=datetime(2022, 5, 18, 22, 33, 2),
  852. interval=timedelta(minutes=1),
  853. jitter=3,
  854. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  855. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  856. )
  857. def test_empty_series(self):
  858. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  859. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []