test_tsdb_backend.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. import pytz
  4. from snuba_sdk import Limit
  5. from sentry.models import Environment, Group, GroupRelease, Release
  6. from sentry.testutils import SnubaTestCase, TestCase
  7. from sentry.testutils.helpers.datetime import iso_format
  8. from sentry.testutils.performance_issues.store_transaction import PerfIssueTransactionTestMixin
  9. from sentry.testutils.silo import region_silo_test
  10. from sentry.tsdb.base import TSDBModel
  11. from sentry.tsdb.snuba import SnubaTSDB
  12. from sentry.types.issues import GroupType
  13. from sentry.utils.dates import to_datetime, to_timestamp
  14. from sentry.utils.snuba import aliased_query
  15. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  16. def timestamp(d):
  17. t = int(to_timestamp(d))
  18. return t - (t % 3600)
  19. def has_shape(data, shape, allow_empty=False):
  20. """
  21. Determine if a data object has the provided shape
  22. At any level, the object in `data` and in `shape` must have the same type.
  23. A dict is the same shape if all its keys and values have the same shape as the
  24. key/value in `shape`. The number of keys/values is not relevant.
  25. A list is the same shape if all its items have the same shape as the value
  26. in `shape`
  27. A tuple is the same shape if it has the same length as `shape` and all the
  28. values have the same shape as the corresponding value in `shape`
  29. Any other object simply has to have the same type.
  30. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  31. """
  32. if not isinstance(data, type(shape)):
  33. return False
  34. if isinstance(data, dict):
  35. return (
  36. (allow_empty or len(data) > 0)
  37. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  38. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  39. )
  40. elif isinstance(data, list):
  41. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  42. elif isinstance(data, tuple):
  43. return len(data) == len(shape) and all(
  44. has_shape(data[i], shape[i]) for i in range(len(data))
  45. )
  46. else:
  47. return True
  48. class SnubaTSDBTest(TestCase, SnubaTestCase):
  49. def setUp(self):
  50. super().setUp()
  51. self.db = SnubaTSDB()
  52. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  53. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  54. )
  55. self.proj1 = self.create_project()
  56. env1 = "test"
  57. env2 = "dev"
  58. defaultenv = ""
  59. release1 = "1" * 10
  60. release2 = "2" * 10
  61. self.release1 = Release.objects.create(
  62. organization_id=self.organization.id, version=release1, date_added=self.now
  63. )
  64. self.release1.add_project(self.proj1)
  65. self.release2 = Release.objects.create(
  66. organization_id=self.organization.id, version=release2, date_added=self.now
  67. )
  68. self.release2.add_project(self.proj1)
  69. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  70. self.store_event(
  71. data={
  72. "event_id": (str(r) * 32)[:32],
  73. "message": "message 1",
  74. "platform": "python",
  75. "fingerprint": [["group-1"], ["group-2"]][
  76. (r // 600) % 2
  77. ], # Switch every 10 mins
  78. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  79. "tags": {
  80. "foo": "bar",
  81. "baz": "quux",
  82. # Switch every 2 hours
  83. "environment": [env1, None][(r // 7200) % 3],
  84. "sentry:user": f"id:user{r // 3300}",
  85. },
  86. "user": {
  87. # change every 55 min so some hours have 1 user, some have 2
  88. "id": f"user{r // 3300}",
  89. },
  90. "release": str(r // 3600) * 10, # 1 per hour,
  91. },
  92. project_id=self.proj1.id,
  93. )
  94. groups = Group.objects.filter(project=self.proj1).order_by("id")
  95. self.proj1group1 = groups[0]
  96. self.proj1group2 = groups[1]
  97. self.env1 = Environment.objects.get(name=env1)
  98. self.env2 = self.create_environment(name=env2) # No events
  99. self.defaultenv = Environment.objects.get(name=defaultenv)
  100. self.group1release1env1 = GroupRelease.objects.get(
  101. project_id=self.proj1.id,
  102. group_id=self.proj1group1.id,
  103. release_id=self.release1.id,
  104. environment=env1,
  105. )
  106. self.group1release2env1 = GroupRelease.objects.create(
  107. project_id=self.proj1.id,
  108. group_id=self.proj1group1.id,
  109. release_id=self.release2.id,
  110. environment=env1,
  111. )
  112. self.group2release1env1 = GroupRelease.objects.get(
  113. project_id=self.proj1.id,
  114. group_id=self.proj1group2.id,
  115. release_id=self.release1.id,
  116. environment=env1,
  117. )
  118. def test_range_single(self):
  119. env1 = "test"
  120. project = self.create_project()
  121. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  122. self.store_event(
  123. data={
  124. "event_id": (str(r) * 32)[:32],
  125. "message": "message 1",
  126. "platform": "python",
  127. "fingerprint": ["group-1"],
  128. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  129. "tags": {
  130. "foo": "bar",
  131. "baz": "quux",
  132. # Switch every 2 hours
  133. "environment": [env1, None][(r // 7200) % 3],
  134. "sentry:user": f"id:user{r // 3300}",
  135. },
  136. "user": {
  137. # change every 55 min so some hours have 1 user, some have 2
  138. "id": f"user{r // 3300}",
  139. },
  140. "release": str(r // 3600) * 10, # 1 per hour,
  141. },
  142. project_id=project.id,
  143. )
  144. groups = Group.objects.filter(project=project).order_by("id")
  145. group = groups[0]
  146. dts = [self.now + timedelta(hours=i) for i in range(4)]
  147. assert self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1], rollup=3600) == {
  148. group.id: [
  149. (timestamp(dts[0]), 6 * 2),
  150. (timestamp(dts[1]), 6 * 2),
  151. (timestamp(dts[2]), 6 * 2),
  152. (timestamp(dts[3]), 6 * 2),
  153. ]
  154. }
  155. def test_range_groups(self):
  156. dts = [self.now + timedelta(hours=i) for i in range(4)]
  157. assert self.db.get_range(
  158. TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  159. ) == {
  160. self.proj1group1.id: [
  161. (timestamp(dts[0]), 3),
  162. (timestamp(dts[1]), 3),
  163. (timestamp(dts[2]), 3),
  164. (timestamp(dts[3]), 3),
  165. ]
  166. }
  167. # Multiple groups
  168. assert self.db.get_range(
  169. TSDBModel.group,
  170. [self.proj1group1.id, self.proj1group2.id],
  171. dts[0],
  172. dts[-1],
  173. rollup=3600,
  174. ) == {
  175. self.proj1group1.id: [
  176. (timestamp(dts[0]), 3),
  177. (timestamp(dts[1]), 3),
  178. (timestamp(dts[2]), 3),
  179. (timestamp(dts[3]), 3),
  180. ],
  181. self.proj1group2.id: [
  182. (timestamp(dts[0]), 3),
  183. (timestamp(dts[1]), 3),
  184. (timestamp(dts[2]), 3),
  185. (timestamp(dts[3]), 3),
  186. ],
  187. }
  188. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  189. def test_range_releases(self):
  190. dts = [self.now + timedelta(hours=i) for i in range(4)]
  191. assert self.db.get_range(
  192. TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600
  193. ) == {
  194. self.release1.id: [
  195. (timestamp(dts[0]), 0),
  196. (timestamp(dts[1]), 6),
  197. (timestamp(dts[2]), 0),
  198. (timestamp(dts[3]), 0),
  199. ]
  200. }
  201. def test_range_project(self):
  202. dts = [self.now + timedelta(hours=i) for i in range(4)]
  203. assert self.db.get_range(
  204. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  205. ) == {
  206. self.proj1.id: [
  207. (timestamp(dts[0]), 6),
  208. (timestamp(dts[1]), 6),
  209. (timestamp(dts[2]), 6),
  210. (timestamp(dts[3]), 6),
  211. ]
  212. }
  213. def test_range_environment_filter(self):
  214. dts = [self.now + timedelta(hours=i) for i in range(4)]
  215. assert self.db.get_range(
  216. TSDBModel.project,
  217. [self.proj1.id],
  218. dts[0],
  219. dts[-1],
  220. rollup=3600,
  221. environment_ids=[self.env1.id],
  222. ) == {
  223. self.proj1.id: [
  224. (timestamp(dts[0]), 6),
  225. (timestamp(dts[1]), 6),
  226. (timestamp(dts[2]), 0),
  227. (timestamp(dts[3]), 0),
  228. ]
  229. }
  230. # No events submitted for env2
  231. assert self.db.get_range(
  232. TSDBModel.project,
  233. [self.proj1.id],
  234. dts[0],
  235. dts[-1],
  236. rollup=3600,
  237. environment_ids=[self.env2.id],
  238. ) == {
  239. self.proj1.id: [
  240. (timestamp(dts[0]), 0),
  241. (timestamp(dts[1]), 0),
  242. (timestamp(dts[2]), 0),
  243. (timestamp(dts[3]), 0),
  244. ]
  245. }
  246. # Events submitted with no environment should match default environment
  247. assert self.db.get_range(
  248. TSDBModel.project,
  249. [self.proj1.id],
  250. dts[0],
  251. dts[-1],
  252. rollup=3600,
  253. environment_ids=[self.defaultenv.id],
  254. ) == {
  255. self.proj1.id: [
  256. (timestamp(dts[0]), 0),
  257. (timestamp(dts[1]), 0),
  258. (timestamp(dts[2]), 6),
  259. (timestamp(dts[3]), 6),
  260. ]
  261. }
  262. def test_range_rollups(self):
  263. # Daily
  264. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  265. dts = [daystart + timedelta(days=i) for i in range(2)]
  266. assert self.db.get_range(
  267. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400
  268. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  269. # Minutely
  270. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  271. # Expect every 10th minute to have a 1, else 0
  272. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  273. assert self.db.get_range(
  274. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60
  275. ) == {self.proj1.id: expected}
  276. def test_distinct_counts_series_users(self):
  277. dts = [self.now + timedelta(hours=i) for i in range(4)]
  278. assert self.db.get_distinct_counts_series(
  279. TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  280. ) == {
  281. self.proj1group1.id: [
  282. (timestamp(dts[0]), 1),
  283. (timestamp(dts[1]), 1),
  284. (timestamp(dts[2]), 1),
  285. (timestamp(dts[3]), 2),
  286. ]
  287. }
  288. dts = [self.now + timedelta(hours=i) for i in range(4)]
  289. assert self.db.get_distinct_counts_series(
  290. TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  291. ) == {
  292. self.proj1.id: [
  293. (timestamp(dts[0]), 1),
  294. (timestamp(dts[1]), 2),
  295. (timestamp(dts[2]), 2),
  296. (timestamp(dts[3]), 2),
  297. ]
  298. }
  299. assert (
  300. self.db.get_distinct_counts_series(
  301. TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600
  302. )
  303. == {}
  304. )
  305. def get_distinct_counts_totals_users(self):
  306. assert self.db.get_distinct_counts_totals(
  307. TSDBModel.users_affected_by_group,
  308. [self.proj1group1.id],
  309. self.now,
  310. self.now + timedelta(hours=4),
  311. rollup=3600,
  312. ) == {
  313. self.proj1group1.id: 2 # 2 unique users overall
  314. }
  315. assert self.db.get_distinct_counts_totals(
  316. TSDBModel.users_affected_by_group,
  317. [self.proj1group1.id],
  318. self.now,
  319. self.now,
  320. rollup=3600,
  321. ) == {
  322. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  323. }
  324. assert self.db.get_distinct_counts_totals(
  325. TSDBModel.users_affected_by_project,
  326. [self.proj1.id],
  327. self.now,
  328. self.now + timedelta(hours=4),
  329. rollup=3600,
  330. ) == {self.proj1.id: 2}
  331. assert (
  332. self.db.get_distinct_counts_totals(
  333. TSDBModel.users_affected_by_group,
  334. [],
  335. self.now,
  336. self.now + timedelta(hours=4),
  337. rollup=3600,
  338. )
  339. == {}
  340. )
  341. def test_most_frequent(self):
  342. assert self.db.get_most_frequent(
  343. TSDBModel.frequent_issues_by_project,
  344. [self.proj1.id],
  345. self.now,
  346. self.now + timedelta(hours=4),
  347. rollup=3600,
  348. ) in [
  349. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  350. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  351. ] # Both issues equally frequent
  352. assert (
  353. self.db.get_most_frequent(
  354. TSDBModel.frequent_issues_by_project,
  355. [],
  356. self.now,
  357. self.now + timedelta(hours=4),
  358. rollup=3600,
  359. )
  360. == {}
  361. )
  362. def test_frequency_series(self):
  363. dts = [self.now + timedelta(hours=i) for i in range(4)]
  364. assert self.db.get_frequency_series(
  365. TSDBModel.frequent_releases_by_group,
  366. {
  367. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  368. self.proj1group2.id: (self.group2release1env1.id,),
  369. },
  370. dts[0],
  371. dts[-1],
  372. rollup=3600,
  373. ) == {
  374. self.proj1group1.id: [
  375. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  376. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  377. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  378. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  379. ],
  380. self.proj1group2.id: [
  381. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  382. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  383. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  384. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  385. ],
  386. }
  387. assert (
  388. self.db.get_frequency_series(
  389. TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600
  390. )
  391. == {}
  392. )
  393. def test_result_shape(self):
  394. """
  395. Tests that the results from the different TSDB methods have the
  396. expected format.
  397. """
  398. project_id = self.proj1.id
  399. dts = [self.now + timedelta(hours=i) for i in range(4)]
  400. results = self.db.get_most_frequent(
  401. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  402. )
  403. assert has_shape(results, {1: [(1, 1.0)]})
  404. results = self.db.get_most_frequent_series(
  405. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  406. )
  407. assert has_shape(results, {1: [(1, {1: 1.0})]})
  408. items = {
  409. # {project_id: (issue_id, issue_id, ...)}
  410. project_id: (self.proj1group1.id, self.proj1group2.id)
  411. }
  412. results = self.db.get_frequency_series(
  413. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  414. )
  415. assert has_shape(results, {1: [(1, {1: 1})]})
  416. results = self.db.get_frequency_totals(
  417. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  418. )
  419. assert has_shape(results, {1: {1: 1}})
  420. results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
  421. assert has_shape(results, {1: [(1, 1)]})
  422. results = self.db.get_distinct_counts_series(
  423. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  424. )
  425. assert has_shape(results, {1: [(1, 1)]})
  426. results = self.db.get_distinct_counts_totals(
  427. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  428. )
  429. assert has_shape(results, {1: 1})
  430. results = self.db.get_distinct_counts_union(
  431. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  432. )
  433. assert has_shape(results, 1)
  434. def test_calculated_limit(self):
  435. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  436. # 24h test
  437. rollup = 3600
  438. end = self.now
  439. start = end + timedelta(days=-1, seconds=rollup)
  440. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  441. assert snuba.call_args.args[0].query.limit == Limit(120)
  442. # 14 day test
  443. rollup = 86400
  444. start = end + timedelta(days=-14, seconds=rollup)
  445. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  446. assert snuba.call_args.args[0].query.limit == Limit(70)
  447. # 1h test
  448. rollup = 3600
  449. end = self.now
  450. start = end + timedelta(hours=-1, seconds=rollup)
  451. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  452. assert snuba.call_args.args[0].query.limit == Limit(5)
  453. @patch("sentry.utils.snuba.OVERRIDE_OPTIONS", new={"consistent": True})
  454. def test_tsdb_with_consistent(self):
  455. with patch("sentry.utils.snuba._apply_cache_and_build_results") as snuba:
  456. rollup = 3600
  457. end = self.now
  458. start = end + timedelta(days=-1, seconds=rollup)
  459. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  460. assert snuba.call_args.args[0][0][0].query.limit == Limit(120)
  461. assert snuba.call_args.args[0][0][0].flags.consistent is True
  462. @region_silo_test
  463. class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin):
  464. def setUp(self):
  465. super().setUp()
  466. self.db = SnubaTSDB()
  467. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  468. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  469. )
  470. self.proj1 = self.create_project()
  471. self.env1 = Environment.objects.get_or_create(
  472. organization_id=self.proj1.organization_id, name="test"
  473. )[0]
  474. self.env2 = Environment.objects.get_or_create(
  475. organization_id=self.proj1.organization_id, name="dev"
  476. )[0]
  477. defaultenv = ""
  478. group1_fingerprint = f"{GroupType.PERFORMANCE_RENDER_BLOCKING_ASSET_SPAN.value}-group1"
  479. group2_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group2"
  480. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  481. event = self.store_transaction(
  482. environment=[self.env1.name, None][(r // 7200) % 3],
  483. project_id=self.proj1.id,
  484. # change every 55 min so some hours have 1 user, some have 2
  485. user_id=f"user{r // 3300}",
  486. # release_version=str(r // 3600) * 10, # 1 per hour,
  487. timestamp=self.now + timedelta(seconds=r),
  488. fingerprint=[group1_fingerprint, group2_fingerprint] if ((r // 600) % 2) else [],
  489. )
  490. self.proj1group1 = event.groups[0]
  491. self.proj1group2 = event.groups[1]
  492. self.defaultenv = Environment.objects.get(name=defaultenv)
  493. def test_range_groups_single(self):
  494. from sentry.snuba.dataset import Dataset
  495. now = (datetime.utcnow() - timedelta(days=1)).replace(
  496. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  497. )
  498. dts = [now + timedelta(hours=i) for i in range(4)]
  499. project = self.create_project()
  500. group_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group3"
  501. # not sure what's going on here, but `times=1,2,3,4` work fine
  502. # fails with anything above 4
  503. times = 4
  504. event_ids = []
  505. events = []
  506. for i in range(0, times):
  507. res = self.store_transaction(
  508. environment=None,
  509. project_id=project.id,
  510. user_id="my_user",
  511. timestamp=now + timedelta(minutes=i * 10),
  512. fingerprint=[group_fingerprint],
  513. )
  514. grouped_by_project = aliased_query(
  515. dataset=Dataset.Transactions,
  516. start=None,
  517. end=None,
  518. groupby=None,
  519. conditions=None,
  520. filter_keys={"project_id": [project.id], "event_id": [res.event_id]},
  521. selected_columns=["event_id", "project_id", "group_ids"],
  522. aggregations=None,
  523. )
  524. assert grouped_by_project["data"][0]["event_id"] == res.event_id
  525. from sentry.eventstore.models import Event
  526. event_from_nodestore = Event(project_id=project.id, event_id=res.event_id)
  527. assert event_from_nodestore.event_id == res.event_id
  528. event_ids.append(res.event_id)
  529. events.append(res)
  530. group = events[0].groups[0]
  531. transactions_for_project = aliased_query(
  532. dataset=Dataset.Transactions,
  533. start=None,
  534. end=None,
  535. groupby=None,
  536. conditions=None,
  537. filter_keys={"project_id": [project.id]},
  538. selected_columns=["project_id", "event_id"],
  539. aggregations=None,
  540. )
  541. assert len(transactions_for_project["data"]) == times
  542. transactions_by_group = aliased_query(
  543. dataset=Dataset.Transactions,
  544. start=None,
  545. end=None,
  546. # start=group.first_seen,
  547. # end=now + timedelta(hours=4),
  548. groupby=["group_id"],
  549. conditions=None,
  550. filter_keys={"project_id": [project.id], "group_id": [group.id]},
  551. aggregations=[
  552. ["arrayJoin", ["group_ids"], "group_id"],
  553. ["count()", "", "times_seen"],
  554. ],
  555. )
  556. assert transactions_by_group["data"][0]["times_seen"] == times # 1 + (times % 5)
  557. assert self.db.get_range(
  558. TSDBModel.group_performance,
  559. [group.id],
  560. dts[0],
  561. dts[-1],
  562. rollup=3600,
  563. ) == {
  564. group.id: [
  565. # (timestamp(dts[0]), 1 + (times % 5)),
  566. (timestamp(dts[0]), times),
  567. (timestamp(dts[1]), 0),
  568. (timestamp(dts[2]), 0),
  569. (timestamp(dts[3]), 0),
  570. ]
  571. }
  572. def test_range_groups_mult(self):
  573. now = (datetime.utcnow() - timedelta(days=1)).replace(
  574. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  575. )
  576. dts = [now + timedelta(hours=i) for i in range(4)]
  577. project = self.create_project()
  578. group_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group4"
  579. ids = ["a", "b", "c", "d", "e", "f", "1", "2", "3", "4", "5"]
  580. events = []
  581. for i, _ in enumerate(ids):
  582. event = self.store_transaction(
  583. environment=None,
  584. project_id=project.id,
  585. user_id="my_user",
  586. timestamp=now + timedelta(minutes=i * 10),
  587. fingerprint=[group_fingerprint],
  588. )
  589. events.append(event)
  590. group = events[0].groups[0]
  591. assert self.db.get_range(
  592. TSDBModel.group_performance,
  593. [group.id],
  594. dts[0],
  595. dts[-1],
  596. rollup=3600,
  597. ) == {
  598. group.id: [
  599. (timestamp(dts[0]), 6),
  600. (timestamp(dts[1]), 5),
  601. (timestamp(dts[2]), 0),
  602. (timestamp(dts[3]), 0),
  603. ]
  604. }
  605. def test_range_groups_simple(self):
  606. project = self.create_project()
  607. now = (datetime.utcnow() - timedelta(days=1)).replace(
  608. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  609. )
  610. group_fingerprint = f"{GroupType.PERFORMANCE_RENDER_BLOCKING_ASSET_SPAN.value}-group5"
  611. # for r in range(0, 14400, 600): # Every 10 min for 4 hours
  612. # for r in [1, 2, 3, 4, 5, 6, 7, 8]:
  613. ids = ["a", "b", "c", "d", "e"] # , "f"]
  614. events = []
  615. for r in ids:
  616. # for r in range(0, 9, 1):
  617. event = self.store_transaction(
  618. environment=None,
  619. project_id=project.id,
  620. # change every 55 min so some hours have 1 user, some have 2
  621. user_id=f"user{r}",
  622. # release_version=str(r // 3600) * 10, # 1 per hour,
  623. timestamp=now,
  624. fingerprint=[group_fingerprint],
  625. )
  626. events.append(event)
  627. group = events[0].groups[0]
  628. dts = [now + timedelta(hours=i) for i in range(4)]
  629. assert self.db.get_range(
  630. TSDBModel.group_performance,
  631. [group.id],
  632. dts[0],
  633. dts[-1],
  634. rollup=3600,
  635. ) == {
  636. group.id: [
  637. (timestamp(dts[0]), len(ids)),
  638. (timestamp(dts[1]), 0),
  639. (timestamp(dts[2]), 0),
  640. (timestamp(dts[3]), 0),
  641. ]
  642. }
  643. def test_range_groups(self):
  644. dts = [self.now + timedelta(hours=i) for i in range(4)]
  645. # Multiple groups
  646. assert self.db.get_range(
  647. TSDBModel.group_performance,
  648. [self.proj1group1.id, self.proj1group2.id],
  649. dts[0],
  650. dts[-1],
  651. rollup=3600,
  652. ) == {
  653. self.proj1group1.id: [
  654. (timestamp(dts[0]), 3),
  655. (timestamp(dts[1]), 3),
  656. (timestamp(dts[2]), 3),
  657. (timestamp(dts[3]), 3),
  658. ],
  659. self.proj1group2.id: [
  660. (timestamp(dts[0]), 3),
  661. (timestamp(dts[1]), 3),
  662. (timestamp(dts[2]), 3),
  663. (timestamp(dts[3]), 3),
  664. ],
  665. }
  666. assert (
  667. self.db.get_range(TSDBModel.group_performance, [], dts[0], dts[-1], rollup=3600) == {}
  668. )
  669. @region_silo_test
  670. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  671. def setUp(self):
  672. super().setUp()
  673. self.db = SnubaTSDB()
  674. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  675. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  676. )
  677. self.proj1 = self.create_project()
  678. self.env1 = Environment.objects.get_or_create(
  679. organization_id=self.proj1.organization_id, name="test"
  680. )[0]
  681. self.env2 = Environment.objects.get_or_create(
  682. organization_id=self.proj1.organization_id, name="dev"
  683. )[0]
  684. defaultenv = ""
  685. group1_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group1"
  686. group2_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group2"
  687. groups = {}
  688. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  689. event, occurrence, group_info = self.store_search_issue(
  690. project_id=self.proj1.id,
  691. # change every 55 min so some hours have 1 user, some have 2
  692. user_id=r // 3300,
  693. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  694. # release_version=str(r // 3600) * 10, # 1 per hour,
  695. environment=[self.env1.name, None][(r // 7200) % 3],
  696. insert_time=self.now + timedelta(seconds=r),
  697. )
  698. if group_info:
  699. groups[group_info.group.id] = group_info.group
  700. all_groups = list(groups.values())
  701. self.proj1group1 = all_groups[0]
  702. self.proj1group2 = all_groups[1]
  703. self.defaultenv = Environment.objects.get(name=defaultenv)
  704. def test_range_group_manual_group_time_rollup(self):
  705. project = self.create_project()
  706. # these are the only granularities/rollups that be actually be used
  707. GRANULARITIES = [
  708. (10, timedelta(seconds=10), 5),
  709. (60 * 60, timedelta(hours=1), 6),
  710. (60 * 60 * 24, timedelta(days=1), 15),
  711. ]
  712. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  713. hour=0, minute=0, second=0
  714. )
  715. for step, delta, times in GRANULARITIES:
  716. series = [start + (delta * i) for i in range(times)]
  717. series_ts = [int(to_timestamp(ts)) for ts in series]
  718. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  719. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  720. step,
  721. series_ts,
  722. )
  723. for time_step in series:
  724. _, _, group_info = self.store_search_issue(
  725. project_id=project.id,
  726. user_id=0,
  727. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  728. environment=None,
  729. insert_time=time_step,
  730. )
  731. assert self.db.get_range(
  732. TSDBModel.group_generic,
  733. [group_info.group.id],
  734. series[0],
  735. series[-1],
  736. rollup=None,
  737. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  738. def test_range_groups_mult(self):
  739. now = (datetime.utcnow() - timedelta(days=1)).replace(
  740. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  741. )
  742. dts = [now + timedelta(hours=i) for i in range(4)]
  743. project = self.create_project()
  744. group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group4"
  745. groups = []
  746. for i in range(0, 11):
  747. _, _, group_info = self.store_search_issue(
  748. project_id=project.id,
  749. user_id=0,
  750. fingerprints=[group_fingerprint],
  751. environment=None,
  752. insert_time=now + timedelta(minutes=i * 10),
  753. )
  754. if group_info:
  755. groups.append(group_info.group)
  756. group = groups[0]
  757. assert self.db.get_range(
  758. TSDBModel.group_generic,
  759. [group.id],
  760. dts[0],
  761. dts[-1],
  762. rollup=3600,
  763. ) == {
  764. group.id: [
  765. (timestamp(dts[0]), 6),
  766. (timestamp(dts[1]), 5),
  767. (timestamp(dts[2]), 0),
  768. (timestamp(dts[3]), 0),
  769. ]
  770. }
  771. def test_range_groups_simple(self):
  772. project = self.create_project()
  773. now = (datetime.utcnow() - timedelta(days=1)).replace(
  774. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  775. )
  776. group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group5"
  777. ids = [1, 2, 3, 4, 5]
  778. groups = []
  779. for r in ids:
  780. # for r in range(0, 9, 1):
  781. event, occurrence, group_info = self.store_search_issue(
  782. project_id=project.id,
  783. # change every 55 min so some hours have 1 user, some have 2
  784. user_id=r,
  785. fingerprints=[group_fingerprint],
  786. environment=None,
  787. # release_version=str(r // 3600) * 10, # 1 per hour,
  788. insert_time=now,
  789. )
  790. if group_info:
  791. groups.append(group_info.group)
  792. group = groups[0]
  793. dts = [now + timedelta(hours=i) for i in range(4)]
  794. assert self.db.get_range(
  795. TSDBModel.group_generic,
  796. [group.id],
  797. dts[0],
  798. dts[-1],
  799. rollup=3600,
  800. ) == {
  801. group.id: [
  802. (timestamp(dts[0]), len(ids)),
  803. (timestamp(dts[1]), 0),
  804. (timestamp(dts[2]), 0),
  805. (timestamp(dts[3]), 0),
  806. ]
  807. }
  808. def test_range_groups(self):
  809. dts = [self.now + timedelta(hours=i) for i in range(4)]
  810. # Multiple groups
  811. assert self.db.get_range(
  812. TSDBModel.group_generic,
  813. [self.proj1group1.id, self.proj1group2.id],
  814. dts[0],
  815. dts[-1],
  816. rollup=3600,
  817. ) == {
  818. self.proj1group1.id: [
  819. (timestamp(dts[0]), 3),
  820. (timestamp(dts[1]), 3),
  821. (timestamp(dts[2]), 3),
  822. (timestamp(dts[3]), 3),
  823. ],
  824. self.proj1group2.id: [
  825. (timestamp(dts[0]), 3),
  826. (timestamp(dts[1]), 3),
  827. (timestamp(dts[2]), 3),
  828. (timestamp(dts[3]), 3),
  829. ],
  830. }
  831. assert self.db.get_range(TSDBModel.group_generic, [], dts[0], dts[-1], rollup=3600) == {}
  832. def test_get_distinct_counts_totals_users(self):
  833. assert self.db.get_distinct_counts_totals(
  834. TSDBModel.users_affected_by_generic_group,
  835. [self.proj1group1.id],
  836. self.now,
  837. self.now + timedelta(hours=4),
  838. rollup=3600,
  839. ) == {
  840. self.proj1group1.id: 5 # 5 unique users overall
  841. }
  842. assert self.db.get_distinct_counts_totals(
  843. TSDBModel.users_affected_by_generic_group,
  844. [self.proj1group1.id],
  845. self.now,
  846. self.now,
  847. rollup=3600,
  848. ) == {
  849. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  850. }
  851. assert (
  852. self.db.get_distinct_counts_totals(
  853. TSDBModel.users_affected_by_generic_group,
  854. [],
  855. self.now,
  856. self.now + timedelta(hours=4),
  857. rollup=3600,
  858. )
  859. == {}
  860. )
  861. def test_get_sums(self):
  862. assert self.db.get_sums(
  863. model=TSDBModel.group_generic,
  864. keys=[self.proj1group1.id, self.proj1group2.id],
  865. start=self.now,
  866. end=self.now + timedelta(hours=4),
  867. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  868. def test_get_data_or_conditions_parsed(self):
  869. """
  870. Verify parsing the legacy format with nested OR conditions works
  871. """
  872. conditions = [
  873. # or conditions in the legacy format needs open and close brackets for precedence
  874. # there's some special casing when parsing conditions that specifically handles this
  875. [
  876. [["isNull", ["environment"]], "=", 1],
  877. ["environment", "IN", [self.env1.name]],
  878. ]
  879. ]
  880. data1 = self.db.get_data(
  881. model=TSDBModel.group_generic,
  882. keys=[self.proj1group1.id, self.proj1group2.id],
  883. conditions=conditions,
  884. start=self.now,
  885. end=self.now + timedelta(hours=4),
  886. )
  887. data2 = self.db.get_data(
  888. model=TSDBModel.group_generic,
  889. keys=[self.proj1group1.id, self.proj1group2.id],
  890. start=self.now,
  891. end=self.now + timedelta(hours=4),
  892. )
  893. # the above queries should return the same data since all groups either have:
  894. # environment=None or environment=test
  895. # so the condition really shouldn't be filtering anything
  896. assert data1 == data2
  897. class AddJitterToSeriesTest(TestCase):
  898. def setUp(self):
  899. self.db = SnubaTSDB()
  900. def run_test(self, end, interval, jitter, expected_start, expected_end):
  901. end = end.replace(tzinfo=pytz.UTC)
  902. start = end - interval
  903. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  904. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  905. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  906. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  907. def test(self):
  908. self.run_test(
  909. end=datetime(2022, 5, 18, 10, 23, 4),
  910. interval=timedelta(hours=1),
  911. jitter=5,
  912. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  913. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  914. )
  915. self.run_test(
  916. end=datetime(2022, 5, 18, 10, 23, 8),
  917. interval=timedelta(hours=1),
  918. jitter=5,
  919. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  920. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  921. )
  922. # Jitter should be the same
  923. self.run_test(
  924. end=datetime(2022, 5, 18, 10, 23, 8),
  925. interval=timedelta(hours=1),
  926. jitter=55,
  927. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  928. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  929. )
  930. self.run_test(
  931. end=datetime(2022, 5, 18, 22, 33, 2),
  932. interval=timedelta(minutes=1),
  933. jitter=3,
  934. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  935. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  936. )
  937. def test_empty_series(self):
  938. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  939. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []