test_tsdb_backend.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. import pytz
  4. from snuba_sdk import Limit
  5. from sentry.issues.grouptype import (
  6. PerformanceNPlusOneGroupType,
  7. PerformanceRenderBlockingAssetSpanGroupType,
  8. ProfileFileIOGroupType,
  9. )
  10. from sentry.models import Environment, Group, GroupRelease, Release
  11. from sentry.testutils import SnubaTestCase, TestCase
  12. from sentry.testutils.helpers.datetime import iso_format
  13. from sentry.testutils.performance_issues.store_transaction import PerfIssueTransactionTestMixin
  14. from sentry.testutils.silo import region_silo_test
  15. from sentry.tsdb.base import TSDBModel
  16. from sentry.tsdb.snuba import SnubaTSDB
  17. from sentry.utils.dates import to_datetime, to_timestamp
  18. from sentry.utils.snuba import aliased_query
  19. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  20. def timestamp(d):
  21. t = int(to_timestamp(d))
  22. return t - (t % 3600)
  23. def has_shape(data, shape, allow_empty=False):
  24. """
  25. Determine if a data object has the provided shape
  26. At any level, the object in `data` and in `shape` must have the same type.
  27. A dict is the same shape if all its keys and values have the same shape as the
  28. key/value in `shape`. The number of keys/values is not relevant.
  29. A list is the same shape if all its items have the same shape as the value
  30. in `shape`
  31. A tuple is the same shape if it has the same length as `shape` and all the
  32. values have the same shape as the corresponding value in `shape`
  33. Any other object simply has to have the same type.
  34. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  35. """
  36. if not isinstance(data, type(shape)):
  37. return False
  38. if isinstance(data, dict):
  39. return (
  40. (allow_empty or len(data) > 0)
  41. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  42. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  43. )
  44. elif isinstance(data, list):
  45. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  46. elif isinstance(data, tuple):
  47. return len(data) == len(shape) and all(
  48. has_shape(data[i], shape[i]) for i in range(len(data))
  49. )
  50. else:
  51. return True
  52. class SnubaTSDBTest(TestCase, SnubaTestCase):
  53. def setUp(self):
  54. super().setUp()
  55. self.db = SnubaTSDB()
  56. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  57. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  58. )
  59. self.proj1 = self.create_project()
  60. env1 = "test"
  61. env2 = "dev"
  62. defaultenv = ""
  63. release1 = "1" * 10
  64. release2 = "2" * 10
  65. self.release1 = Release.objects.create(
  66. organization_id=self.organization.id, version=release1, date_added=self.now
  67. )
  68. self.release1.add_project(self.proj1)
  69. self.release2 = Release.objects.create(
  70. organization_id=self.organization.id, version=release2, date_added=self.now
  71. )
  72. self.release2.add_project(self.proj1)
  73. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  74. self.store_event(
  75. data={
  76. "event_id": (str(r) * 32)[:32],
  77. "message": "message 1",
  78. "platform": "python",
  79. "fingerprint": [["group-1"], ["group-2"]][
  80. (r // 600) % 2
  81. ], # Switch every 10 mins
  82. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  83. "tags": {
  84. "foo": "bar",
  85. "baz": "quux",
  86. # Switch every 2 hours
  87. "environment": [env1, None][(r // 7200) % 3],
  88. "sentry:user": f"id:user{r // 3300}",
  89. },
  90. "user": {
  91. # change every 55 min so some hours have 1 user, some have 2
  92. "id": f"user{r // 3300}",
  93. },
  94. "release": str(r // 3600) * 10, # 1 per hour,
  95. },
  96. project_id=self.proj1.id,
  97. )
  98. groups = Group.objects.filter(project=self.proj1).order_by("id")
  99. self.proj1group1 = groups[0]
  100. self.proj1group2 = groups[1]
  101. self.env1 = Environment.objects.get(name=env1)
  102. self.env2 = self.create_environment(name=env2) # No events
  103. self.defaultenv = Environment.objects.get(name=defaultenv)
  104. self.group1release1env1 = GroupRelease.objects.get(
  105. project_id=self.proj1.id,
  106. group_id=self.proj1group1.id,
  107. release_id=self.release1.id,
  108. environment=env1,
  109. )
  110. self.group1release2env1 = GroupRelease.objects.create(
  111. project_id=self.proj1.id,
  112. group_id=self.proj1group1.id,
  113. release_id=self.release2.id,
  114. environment=env1,
  115. )
  116. self.group2release1env1 = GroupRelease.objects.get(
  117. project_id=self.proj1.id,
  118. group_id=self.proj1group2.id,
  119. release_id=self.release1.id,
  120. environment=env1,
  121. )
  122. def test_range_single(self):
  123. env1 = "test"
  124. project = self.create_project()
  125. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  126. self.store_event(
  127. data={
  128. "event_id": (str(r) * 32)[:32],
  129. "message": "message 1",
  130. "platform": "python",
  131. "fingerprint": ["group-1"],
  132. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  133. "tags": {
  134. "foo": "bar",
  135. "baz": "quux",
  136. # Switch every 2 hours
  137. "environment": [env1, None][(r // 7200) % 3],
  138. "sentry:user": f"id:user{r // 3300}",
  139. },
  140. "user": {
  141. # change every 55 min so some hours have 1 user, some have 2
  142. "id": f"user{r // 3300}",
  143. },
  144. "release": str(r // 3600) * 10, # 1 per hour,
  145. },
  146. project_id=project.id,
  147. )
  148. groups = Group.objects.filter(project=project).order_by("id")
  149. group = groups[0]
  150. dts = [self.now + timedelta(hours=i) for i in range(4)]
  151. assert self.db.get_range(
  152. TSDBModel.group,
  153. [group.id],
  154. dts[0],
  155. dts[-1],
  156. rollup=3600,
  157. tenant_ids={"referrer": "r", "organization_id": 1234},
  158. ) == {
  159. group.id: [
  160. (timestamp(dts[0]), 6 * 2),
  161. (timestamp(dts[1]), 6 * 2),
  162. (timestamp(dts[2]), 6 * 2),
  163. (timestamp(dts[3]), 6 * 2),
  164. ]
  165. }
  166. def test_range_groups(self):
  167. dts = [self.now + timedelta(hours=i) for i in range(4)]
  168. assert self.db.get_range(
  169. TSDBModel.group,
  170. [self.proj1group1.id],
  171. dts[0],
  172. dts[-1],
  173. rollup=3600,
  174. tenant_ids={"referrer": "r", "organization_id": 1234},
  175. ) == {
  176. self.proj1group1.id: [
  177. (timestamp(dts[0]), 3),
  178. (timestamp(dts[1]), 3),
  179. (timestamp(dts[2]), 3),
  180. (timestamp(dts[3]), 3),
  181. ]
  182. }
  183. # Multiple groups
  184. assert self.db.get_range(
  185. TSDBModel.group,
  186. [self.proj1group1.id, self.proj1group2.id],
  187. dts[0],
  188. dts[-1],
  189. rollup=3600,
  190. tenant_ids={"referrer": "r", "organization_id": 1234},
  191. ) == {
  192. self.proj1group1.id: [
  193. (timestamp(dts[0]), 3),
  194. (timestamp(dts[1]), 3),
  195. (timestamp(dts[2]), 3),
  196. (timestamp(dts[3]), 3),
  197. ],
  198. self.proj1group2.id: [
  199. (timestamp(dts[0]), 3),
  200. (timestamp(dts[1]), 3),
  201. (timestamp(dts[2]), 3),
  202. (timestamp(dts[3]), 3),
  203. ],
  204. }
  205. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  206. def test_range_releases(self):
  207. dts = [self.now + timedelta(hours=i) for i in range(4)]
  208. assert self.db.get_range(
  209. TSDBModel.release,
  210. [self.release1.id],
  211. dts[0],
  212. dts[-1],
  213. rollup=3600,
  214. tenant_ids={"referrer": "r", "organization_id": 1234},
  215. ) == {
  216. self.release1.id: [
  217. (timestamp(dts[0]), 0),
  218. (timestamp(dts[1]), 6),
  219. (timestamp(dts[2]), 0),
  220. (timestamp(dts[3]), 0),
  221. ]
  222. }
  223. def test_range_project(self):
  224. dts = [self.now + timedelta(hours=i) for i in range(4)]
  225. assert self.db.get_range(
  226. TSDBModel.project,
  227. [self.proj1.id],
  228. dts[0],
  229. dts[-1],
  230. rollup=3600,
  231. tenant_ids={"referrer": "r", "organization_id": 1234},
  232. ) == {
  233. self.proj1.id: [
  234. (timestamp(dts[0]), 6),
  235. (timestamp(dts[1]), 6),
  236. (timestamp(dts[2]), 6),
  237. (timestamp(dts[3]), 6),
  238. ]
  239. }
  240. def test_range_environment_filter(self):
  241. dts = [self.now + timedelta(hours=i) for i in range(4)]
  242. assert self.db.get_range(
  243. TSDBModel.project,
  244. [self.proj1.id],
  245. dts[0],
  246. dts[-1],
  247. rollup=3600,
  248. environment_ids=[self.env1.id],
  249. tenant_ids={"referrer": "r", "organization_id": 1234},
  250. ) == {
  251. self.proj1.id: [
  252. (timestamp(dts[0]), 6),
  253. (timestamp(dts[1]), 6),
  254. (timestamp(dts[2]), 0),
  255. (timestamp(dts[3]), 0),
  256. ]
  257. }
  258. # No events submitted for env2
  259. assert self.db.get_range(
  260. TSDBModel.project,
  261. [self.proj1.id],
  262. dts[0],
  263. dts[-1],
  264. rollup=3600,
  265. environment_ids=[self.env2.id],
  266. tenant_ids={"referrer": "r", "organization_id": 1234},
  267. ) == {
  268. self.proj1.id: [
  269. (timestamp(dts[0]), 0),
  270. (timestamp(dts[1]), 0),
  271. (timestamp(dts[2]), 0),
  272. (timestamp(dts[3]), 0),
  273. ]
  274. }
  275. # Events submitted with no environment should match default environment
  276. assert self.db.get_range(
  277. TSDBModel.project,
  278. [self.proj1.id],
  279. dts[0],
  280. dts[-1],
  281. rollup=3600,
  282. environment_ids=[self.defaultenv.id],
  283. tenant_ids={"referrer": "r", "organization_id": 1234},
  284. ) == {
  285. self.proj1.id: [
  286. (timestamp(dts[0]), 0),
  287. (timestamp(dts[1]), 0),
  288. (timestamp(dts[2]), 6),
  289. (timestamp(dts[3]), 6),
  290. ]
  291. }
  292. def test_range_rollups(self):
  293. # Daily
  294. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  295. dts = [daystart + timedelta(days=i) for i in range(2)]
  296. assert self.db.get_range(
  297. TSDBModel.project,
  298. [self.proj1.id],
  299. dts[0],
  300. dts[-1],
  301. rollup=86400,
  302. tenant_ids={"referrer": "r", "organization_id": 1234},
  303. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  304. # Minutely
  305. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  306. # Expect every 10th minute to have a 1, else 0
  307. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  308. assert self.db.get_range(
  309. TSDBModel.project,
  310. [self.proj1.id],
  311. dts[0],
  312. dts[-1],
  313. rollup=60,
  314. tenant_ids={"referrer": "r", "organization_id": 1234},
  315. ) == {self.proj1.id: expected}
  316. def test_distinct_counts_series_users(self):
  317. dts = [self.now + timedelta(hours=i) for i in range(4)]
  318. assert self.db.get_distinct_counts_series(
  319. TSDBModel.users_affected_by_group,
  320. [self.proj1group1.id],
  321. dts[0],
  322. dts[-1],
  323. rollup=3600,
  324. tenant_ids={"referrer": "r", "organization_id": 1234},
  325. ) == {
  326. self.proj1group1.id: [
  327. (timestamp(dts[0]), 1),
  328. (timestamp(dts[1]), 1),
  329. (timestamp(dts[2]), 1),
  330. (timestamp(dts[3]), 2),
  331. ]
  332. }
  333. dts = [self.now + timedelta(hours=i) for i in range(4)]
  334. assert self.db.get_distinct_counts_series(
  335. TSDBModel.users_affected_by_project,
  336. [self.proj1.id],
  337. dts[0],
  338. dts[-1],
  339. rollup=3600,
  340. tenant_ids={"referrer": "r", "organization_id": 1234},
  341. ) == {
  342. self.proj1.id: [
  343. (timestamp(dts[0]), 1),
  344. (timestamp(dts[1]), 2),
  345. (timestamp(dts[2]), 2),
  346. (timestamp(dts[3]), 2),
  347. ]
  348. }
  349. assert (
  350. self.db.get_distinct_counts_series(
  351. TSDBModel.users_affected_by_group,
  352. [],
  353. dts[0],
  354. dts[-1],
  355. rollup=3600,
  356. tenant_ids={"referrer": "r", "organization_id": 1234},
  357. )
  358. == {}
  359. )
  360. def get_distinct_counts_totals_users(self):
  361. assert self.db.get_distinct_counts_totals(
  362. TSDBModel.users_affected_by_group,
  363. [self.proj1group1.id],
  364. self.now,
  365. self.now + timedelta(hours=4),
  366. rollup=3600,
  367. ) == {
  368. self.proj1group1.id: 2 # 2 unique users overall
  369. }
  370. assert self.db.get_distinct_counts_totals(
  371. TSDBModel.users_affected_by_group,
  372. [self.proj1group1.id],
  373. self.now,
  374. self.now,
  375. rollup=3600,
  376. ) == {
  377. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  378. }
  379. assert self.db.get_distinct_counts_totals(
  380. TSDBModel.users_affected_by_project,
  381. [self.proj1.id],
  382. self.now,
  383. self.now + timedelta(hours=4),
  384. rollup=3600,
  385. ) == {self.proj1.id: 2}
  386. assert (
  387. self.db.get_distinct_counts_totals(
  388. TSDBModel.users_affected_by_group,
  389. [],
  390. self.now,
  391. self.now + timedelta(hours=4),
  392. rollup=3600,
  393. )
  394. == {}
  395. )
  396. def test_most_frequent(self):
  397. assert self.db.get_most_frequent(
  398. TSDBModel.frequent_issues_by_project,
  399. [self.proj1.id],
  400. self.now,
  401. self.now + timedelta(hours=4),
  402. rollup=3600,
  403. tenant_ids={"referrer": "r", "organization_id": 1234},
  404. ) in [
  405. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  406. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  407. ] # Both issues equally frequent
  408. assert (
  409. self.db.get_most_frequent(
  410. TSDBModel.frequent_issues_by_project,
  411. [],
  412. self.now,
  413. self.now + timedelta(hours=4),
  414. rollup=3600,
  415. tenant_ids={"referrer": "r", "organization_id": 1234},
  416. )
  417. == {}
  418. )
  419. def test_frequency_series(self):
  420. dts = [self.now + timedelta(hours=i) for i in range(4)]
  421. assert self.db.get_frequency_series(
  422. TSDBModel.frequent_releases_by_group,
  423. {
  424. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  425. self.proj1group2.id: (self.group2release1env1.id,),
  426. },
  427. dts[0],
  428. dts[-1],
  429. rollup=3600,
  430. tenant_ids={"referrer": "r", "organization_id": 1234},
  431. ) == {
  432. self.proj1group1.id: [
  433. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  434. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  435. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  436. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  437. ],
  438. self.proj1group2.id: [
  439. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  440. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  441. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  442. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  443. ],
  444. }
  445. assert (
  446. self.db.get_frequency_series(
  447. TSDBModel.frequent_releases_by_group,
  448. {},
  449. dts[0],
  450. dts[-1],
  451. rollup=3600,
  452. tenant_ids={"referrer": "r", "organization_id": 1234},
  453. )
  454. == {}
  455. )
  456. def test_result_shape(self):
  457. """
  458. Tests that the results from the different TSDB methods have the
  459. expected format.
  460. """
  461. project_id = self.proj1.id
  462. dts = [self.now + timedelta(hours=i) for i in range(4)]
  463. results = self.db.get_most_frequent(
  464. TSDBModel.frequent_issues_by_project,
  465. [project_id],
  466. dts[0],
  467. dts[0],
  468. tenant_ids={"referrer": "r", "organization_id": 1234},
  469. )
  470. assert has_shape(results, {1: [(1, 1.0)]})
  471. results = self.db.get_most_frequent_series(
  472. TSDBModel.frequent_issues_by_project,
  473. [project_id],
  474. dts[0],
  475. dts[0],
  476. tenant_ids={"referrer": "r", "organization_id": 1234},
  477. )
  478. assert has_shape(results, {1: [(1, {1: 1.0})]})
  479. items = {
  480. # {project_id: (issue_id, issue_id, ...)}
  481. project_id: (self.proj1group1.id, self.proj1group2.id)
  482. }
  483. results = self.db.get_frequency_series(
  484. TSDBModel.frequent_issues_by_project,
  485. items,
  486. dts[0],
  487. dts[-1],
  488. tenant_ids={"referrer": "r", "organization_id": 1234},
  489. )
  490. assert has_shape(results, {1: [(1, {1: 1})]})
  491. results = self.db.get_frequency_totals(
  492. TSDBModel.frequent_issues_by_project,
  493. items,
  494. dts[0],
  495. dts[-1],
  496. tenant_ids={"referrer": "r", "organization_id": 1234},
  497. )
  498. assert has_shape(results, {1: {1: 1}})
  499. results = self.db.get_range(
  500. TSDBModel.project,
  501. [project_id],
  502. dts[0],
  503. dts[-1],
  504. tenant_ids={"referrer": "r", "organization_id": 1234},
  505. )
  506. assert has_shape(results, {1: [(1, 1)]})
  507. results = self.db.get_distinct_counts_series(
  508. TSDBModel.users_affected_by_project,
  509. [project_id],
  510. dts[0],
  511. dts[-1],
  512. tenant_ids={"referrer": "r", "organization_id": 1234},
  513. )
  514. assert has_shape(results, {1: [(1, 1)]})
  515. results = self.db.get_distinct_counts_totals(
  516. TSDBModel.users_affected_by_project,
  517. [project_id],
  518. dts[0],
  519. dts[-1],
  520. tenant_ids={"referrer": "r", "organization_id": 1234},
  521. )
  522. assert has_shape(results, {1: 1})
  523. results = self.db.get_distinct_counts_union(
  524. TSDBModel.users_affected_by_project,
  525. [project_id],
  526. dts[0],
  527. dts[-1],
  528. tenant_ids={"referrer": "r", "organization_id": 1234},
  529. )
  530. assert has_shape(results, 1)
  531. def test_calculated_limit(self):
  532. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  533. # 24h test
  534. rollup = 3600
  535. end = self.now
  536. start = end + timedelta(days=-1, seconds=rollup)
  537. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  538. assert snuba.call_args.args[0].query.limit == Limit(120)
  539. # 14 day test
  540. rollup = 86400
  541. start = end + timedelta(days=-14, seconds=rollup)
  542. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  543. assert snuba.call_args.args[0].query.limit == Limit(70)
  544. # 1h test
  545. rollup = 3600
  546. end = self.now
  547. start = end + timedelta(hours=-1, seconds=rollup)
  548. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  549. assert snuba.call_args.args[0].query.limit == Limit(5)
  550. @patch("sentry.utils.snuba.OVERRIDE_OPTIONS", new={"consistent": True})
  551. def test_tsdb_with_consistent(self):
  552. with patch("sentry.utils.snuba._apply_cache_and_build_results") as snuba:
  553. rollup = 3600
  554. end = self.now
  555. start = end + timedelta(days=-1, seconds=rollup)
  556. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  557. assert snuba.call_args.args[0][0][0].query.limit == Limit(120)
  558. assert snuba.call_args.args[0][0][0].flags.consistent is True
  559. @region_silo_test
  560. class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin):
  561. def setUp(self):
  562. super().setUp()
  563. self.db = SnubaTSDB()
  564. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  565. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  566. )
  567. self.proj1 = self.create_project()
  568. self.env1 = Environment.objects.get_or_create(
  569. organization_id=self.proj1.organization_id, name="test"
  570. )[0]
  571. self.env2 = Environment.objects.get_or_create(
  572. organization_id=self.proj1.organization_id, name="dev"
  573. )[0]
  574. defaultenv = ""
  575. group1_fingerprint = f"{PerformanceRenderBlockingAssetSpanGroupType.type_id}-group1"
  576. group2_fingerprint = f"{PerformanceNPlusOneGroupType.type_id}-group2"
  577. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  578. event = self.store_transaction(
  579. environment=[self.env1.name, None][(r // 7200) % 3],
  580. project_id=self.proj1.id,
  581. # change every 55 min so some hours have 1 user, some have 2
  582. user_id=f"user{r // 3300}",
  583. # release_version=str(r // 3600) * 10, # 1 per hour,
  584. timestamp=self.now + timedelta(seconds=r),
  585. fingerprint=[group1_fingerprint, group2_fingerprint] if ((r // 600) % 2) else [],
  586. )
  587. self.proj1group1 = event.groups[0]
  588. self.proj1group2 = event.groups[1]
  589. self.defaultenv = Environment.objects.get(name=defaultenv)
  590. def test_range_groups_single(self):
  591. from sentry.snuba.dataset import Dataset
  592. now = (datetime.utcnow() - timedelta(days=1)).replace(
  593. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  594. )
  595. dts = [now + timedelta(hours=i) for i in range(4)]
  596. project = self.create_project()
  597. group_fingerprint = f"{PerformanceNPlusOneGroupType.type_id}-group3"
  598. # not sure what's going on here, but `times=1,2,3,4` work fine
  599. # fails with anything above 4
  600. times = 4
  601. event_ids = []
  602. events = []
  603. for i in range(0, times):
  604. res = self.store_transaction(
  605. environment=None,
  606. project_id=project.id,
  607. user_id="my_user",
  608. timestamp=now + timedelta(minutes=i * 10),
  609. fingerprint=[group_fingerprint],
  610. )
  611. grouped_by_project = aliased_query(
  612. dataset=Dataset.Transactions,
  613. start=None,
  614. end=None,
  615. groupby=None,
  616. conditions=None,
  617. filter_keys={"project_id": [project.id], "event_id": [res.event_id]},
  618. selected_columns=["event_id", "project_id", "group_ids"],
  619. aggregations=None,
  620. tenant_ids={"referrer": "r", "organization_id": 1234},
  621. )
  622. assert grouped_by_project["data"][0]["event_id"] == res.event_id
  623. from sentry.eventstore.models import Event
  624. event_from_nodestore = Event(project_id=project.id, event_id=res.event_id)
  625. assert event_from_nodestore.event_id == res.event_id
  626. event_ids.append(res.event_id)
  627. events.append(res)
  628. group = events[0].groups[0]
  629. transactions_for_project = aliased_query(
  630. dataset=Dataset.Transactions,
  631. start=None,
  632. end=None,
  633. groupby=None,
  634. conditions=None,
  635. filter_keys={"project_id": [project.id]},
  636. selected_columns=["project_id", "event_id"],
  637. aggregations=None,
  638. tenant_ids={"referrer": "r", "organization_id": 1234},
  639. )
  640. assert len(transactions_for_project["data"]) == times
  641. transactions_by_group = aliased_query(
  642. dataset=Dataset.Transactions,
  643. start=None,
  644. end=None,
  645. # start=group.first_seen,
  646. # end=now + timedelta(hours=4),
  647. groupby=["group_id"],
  648. conditions=None,
  649. filter_keys={"project_id": [project.id], "group_id": [group.id]},
  650. aggregations=[
  651. ["arrayJoin", ["group_ids"], "group_id"],
  652. ["count()", "", "times_seen"],
  653. ],
  654. tenant_ids={"referrer": "r", "organization_id": 1234},
  655. )
  656. assert transactions_by_group["data"][0]["times_seen"] == times # 1 + (times % 5)
  657. assert self.db.get_range(
  658. TSDBModel.group_performance,
  659. [group.id],
  660. dts[0],
  661. dts[-1],
  662. rollup=3600,
  663. tenant_ids={"referrer": "r", "organization_id": 1234},
  664. ) == {
  665. group.id: [
  666. # (timestamp(dts[0]), 1 + (times % 5)),
  667. (timestamp(dts[0]), times),
  668. (timestamp(dts[1]), 0),
  669. (timestamp(dts[2]), 0),
  670. (timestamp(dts[3]), 0),
  671. ]
  672. }
  673. def test_range_groups_mult(self):
  674. now = (datetime.utcnow() - timedelta(days=1)).replace(
  675. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  676. )
  677. dts = [now + timedelta(hours=i) for i in range(4)]
  678. project = self.create_project()
  679. group_fingerprint = f"{PerformanceNPlusOneGroupType.type_id}-group4"
  680. ids = ["a", "b", "c", "d", "e", "f", "1", "2", "3", "4", "5"]
  681. events = []
  682. for i, _ in enumerate(ids):
  683. event = self.store_transaction(
  684. environment=None,
  685. project_id=project.id,
  686. user_id="my_user",
  687. timestamp=now + timedelta(minutes=i * 10),
  688. fingerprint=[group_fingerprint],
  689. )
  690. events.append(event)
  691. group = events[0].groups[0]
  692. assert self.db.get_range(
  693. TSDBModel.group_performance,
  694. [group.id],
  695. dts[0],
  696. dts[-1],
  697. rollup=3600,
  698. tenant_ids={"referrer": "r", "organization_id": 1234},
  699. ) == {
  700. group.id: [
  701. (timestamp(dts[0]), 6),
  702. (timestamp(dts[1]), 5),
  703. (timestamp(dts[2]), 0),
  704. (timestamp(dts[3]), 0),
  705. ]
  706. }
  707. def test_range_groups_simple(self):
  708. project = self.create_project()
  709. now = (datetime.utcnow() - timedelta(days=1)).replace(
  710. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  711. )
  712. group_fingerprint = f"{PerformanceRenderBlockingAssetSpanGroupType.type_id}-group5"
  713. # for r in range(0, 14400, 600): # Every 10 min for 4 hours
  714. # for r in [1, 2, 3, 4, 5, 6, 7, 8]:
  715. ids = ["a", "b", "c", "d", "e"] # , "f"]
  716. events = []
  717. for r in ids:
  718. # for r in range(0, 9, 1):
  719. event = self.store_transaction(
  720. environment=None,
  721. project_id=project.id,
  722. # change every 55 min so some hours have 1 user, some have 2
  723. user_id=f"user{r}",
  724. # release_version=str(r // 3600) * 10, # 1 per hour,
  725. timestamp=now,
  726. fingerprint=[group_fingerprint],
  727. )
  728. events.append(event)
  729. group = events[0].groups[0]
  730. dts = [now + timedelta(hours=i) for i in range(4)]
  731. assert self.db.get_range(
  732. TSDBModel.group_performance,
  733. [group.id],
  734. dts[0],
  735. dts[-1],
  736. rollup=3600,
  737. tenant_ids={"referrer": "r", "organization_id": 1234},
  738. ) == {
  739. group.id: [
  740. (timestamp(dts[0]), len(ids)),
  741. (timestamp(dts[1]), 0),
  742. (timestamp(dts[2]), 0),
  743. (timestamp(dts[3]), 0),
  744. ]
  745. }
  746. def test_range_groups(self):
  747. dts = [self.now + timedelta(hours=i) for i in range(4)]
  748. # Multiple groups
  749. assert self.db.get_range(
  750. TSDBModel.group_performance,
  751. [self.proj1group1.id, self.proj1group2.id],
  752. dts[0],
  753. dts[-1],
  754. rollup=3600,
  755. tenant_ids={"referrer": "r", "organization_id": 1234},
  756. ) == {
  757. self.proj1group1.id: [
  758. (timestamp(dts[0]), 3),
  759. (timestamp(dts[1]), 3),
  760. (timestamp(dts[2]), 3),
  761. (timestamp(dts[3]), 3),
  762. ],
  763. self.proj1group2.id: [
  764. (timestamp(dts[0]), 3),
  765. (timestamp(dts[1]), 3),
  766. (timestamp(dts[2]), 3),
  767. (timestamp(dts[3]), 3),
  768. ],
  769. }
  770. assert (
  771. self.db.get_range(
  772. TSDBModel.group_performance,
  773. [],
  774. dts[0],
  775. dts[-1],
  776. rollup=3600,
  777. tenant_ids={"referrer": "r", "organization_id": 1234},
  778. )
  779. == {}
  780. )
  781. @region_silo_test
  782. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  783. def setUp(self):
  784. super().setUp()
  785. self.db = SnubaTSDB()
  786. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  787. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  788. )
  789. self.proj1 = self.create_project()
  790. self.env1 = Environment.objects.get_or_create(
  791. organization_id=self.proj1.organization_id, name="test"
  792. )[0]
  793. self.env2 = Environment.objects.get_or_create(
  794. organization_id=self.proj1.organization_id, name="dev"
  795. )[0]
  796. defaultenv = ""
  797. group1_fingerprint = f"{ProfileFileIOGroupType.type_id}-group1"
  798. group2_fingerprint = f"{ProfileFileIOGroupType.type_id}-group2"
  799. groups = {}
  800. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  801. event, occurrence, group_info = self.store_search_issue(
  802. project_id=self.proj1.id,
  803. # change every 55 min so some hours have 1 user, some have 2
  804. user_id=r // 3300,
  805. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  806. # release_version=str(r // 3600) * 10, # 1 per hour,
  807. environment=[self.env1.name, None][(r // 7200) % 3],
  808. insert_time=self.now + timedelta(seconds=r),
  809. )
  810. if group_info:
  811. groups[group_info.group.id] = group_info.group
  812. all_groups = list(groups.values())
  813. self.proj1group1 = all_groups[0]
  814. self.proj1group2 = all_groups[1]
  815. self.defaultenv = Environment.objects.get(name=defaultenv)
  816. def test_range_group_manual_group_time_rollup(self):
  817. project = self.create_project()
  818. # these are the only granularities/rollups that be actually be used
  819. GRANULARITIES = [
  820. (10, timedelta(seconds=10), 5),
  821. (60 * 60, timedelta(hours=1), 6),
  822. (60 * 60 * 24, timedelta(days=1), 15),
  823. ]
  824. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  825. hour=0, minute=0, second=0
  826. )
  827. for step, delta, times in GRANULARITIES:
  828. series = [start + (delta * i) for i in range(times)]
  829. series_ts = [int(to_timestamp(ts)) for ts in series]
  830. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  831. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  832. step,
  833. series_ts,
  834. )
  835. for time_step in series:
  836. _, _, group_info = self.store_search_issue(
  837. project_id=project.id,
  838. user_id=0,
  839. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  840. environment=None,
  841. insert_time=time_step,
  842. )
  843. assert self.db.get_range(
  844. TSDBModel.group_generic,
  845. [group_info.group.id],
  846. series[0],
  847. series[-1],
  848. rollup=None,
  849. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  850. def test_range_groups_mult(self):
  851. now = (datetime.utcnow() - timedelta(days=1)).replace(
  852. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  853. )
  854. dts = [now + timedelta(hours=i) for i in range(4)]
  855. project = self.create_project()
  856. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group4"
  857. groups = []
  858. for i in range(0, 11):
  859. _, _, group_info = self.store_search_issue(
  860. project_id=project.id,
  861. user_id=0,
  862. fingerprints=[group_fingerprint],
  863. environment=None,
  864. insert_time=now + timedelta(minutes=i * 10),
  865. )
  866. if group_info:
  867. groups.append(group_info.group)
  868. group = groups[0]
  869. assert self.db.get_range(
  870. TSDBModel.group_generic,
  871. [group.id],
  872. dts[0],
  873. dts[-1],
  874. rollup=3600,
  875. ) == {
  876. group.id: [
  877. (timestamp(dts[0]), 6),
  878. (timestamp(dts[1]), 5),
  879. (timestamp(dts[2]), 0),
  880. (timestamp(dts[3]), 0),
  881. ]
  882. }
  883. def test_range_groups_simple(self):
  884. project = self.create_project()
  885. now = (datetime.utcnow() - timedelta(days=1)).replace(
  886. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  887. )
  888. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group5"
  889. ids = [1, 2, 3, 4, 5]
  890. groups = []
  891. for r in ids:
  892. # for r in range(0, 9, 1):
  893. event, occurrence, group_info = self.store_search_issue(
  894. project_id=project.id,
  895. # change every 55 min so some hours have 1 user, some have 2
  896. user_id=r,
  897. fingerprints=[group_fingerprint],
  898. environment=None,
  899. # release_version=str(r // 3600) * 10, # 1 per hour,
  900. insert_time=now,
  901. )
  902. if group_info:
  903. groups.append(group_info.group)
  904. group = groups[0]
  905. dts = [now + timedelta(hours=i) for i in range(4)]
  906. assert self.db.get_range(
  907. TSDBModel.group_generic,
  908. [group.id],
  909. dts[0],
  910. dts[-1],
  911. rollup=3600,
  912. ) == {
  913. group.id: [
  914. (timestamp(dts[0]), len(ids)),
  915. (timestamp(dts[1]), 0),
  916. (timestamp(dts[2]), 0),
  917. (timestamp(dts[3]), 0),
  918. ]
  919. }
  920. def test_range_groups(self):
  921. dts = [self.now + timedelta(hours=i) for i in range(4)]
  922. # Multiple groups
  923. assert self.db.get_range(
  924. TSDBModel.group_generic,
  925. [self.proj1group1.id, self.proj1group2.id],
  926. dts[0],
  927. dts[-1],
  928. rollup=3600,
  929. ) == {
  930. self.proj1group1.id: [
  931. (timestamp(dts[0]), 3),
  932. (timestamp(dts[1]), 3),
  933. (timestamp(dts[2]), 3),
  934. (timestamp(dts[3]), 3),
  935. ],
  936. self.proj1group2.id: [
  937. (timestamp(dts[0]), 3),
  938. (timestamp(dts[1]), 3),
  939. (timestamp(dts[2]), 3),
  940. (timestamp(dts[3]), 3),
  941. ],
  942. }
  943. assert self.db.get_range(TSDBModel.group_generic, [], dts[0], dts[-1], rollup=3600) == {}
  944. def test_get_distinct_counts_totals_users(self):
  945. assert self.db.get_distinct_counts_totals(
  946. TSDBModel.users_affected_by_generic_group,
  947. [self.proj1group1.id],
  948. self.now,
  949. self.now + timedelta(hours=4),
  950. rollup=3600,
  951. ) == {
  952. self.proj1group1.id: 5 # 5 unique users overall
  953. }
  954. assert self.db.get_distinct_counts_totals(
  955. TSDBModel.users_affected_by_generic_group,
  956. [self.proj1group1.id],
  957. self.now,
  958. self.now,
  959. rollup=3600,
  960. ) == {
  961. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  962. }
  963. assert (
  964. self.db.get_distinct_counts_totals(
  965. TSDBModel.users_affected_by_generic_group,
  966. [],
  967. self.now,
  968. self.now + timedelta(hours=4),
  969. rollup=3600,
  970. )
  971. == {}
  972. )
  973. def test_get_sums(self):
  974. assert self.db.get_sums(
  975. model=TSDBModel.group_generic,
  976. keys=[self.proj1group1.id, self.proj1group2.id],
  977. start=self.now,
  978. end=self.now + timedelta(hours=4),
  979. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  980. def test_get_data_or_conditions_parsed(self):
  981. """
  982. Verify parsing the legacy format with nested OR conditions works
  983. """
  984. conditions = [
  985. # or conditions in the legacy format needs open and close brackets for precedence
  986. # there's some special casing when parsing conditions that specifically handles this
  987. [
  988. [["isNull", ["environment"]], "=", 1],
  989. ["environment", "IN", [self.env1.name]],
  990. ]
  991. ]
  992. data1 = self.db.get_data(
  993. model=TSDBModel.group_generic,
  994. keys=[self.proj1group1.id, self.proj1group2.id],
  995. conditions=conditions,
  996. start=self.now,
  997. end=self.now + timedelta(hours=4),
  998. )
  999. data2 = self.db.get_data(
  1000. model=TSDBModel.group_generic,
  1001. keys=[self.proj1group1.id, self.proj1group2.id],
  1002. start=self.now,
  1003. end=self.now + timedelta(hours=4),
  1004. )
  1005. # the above queries should return the same data since all groups either have:
  1006. # environment=None or environment=test
  1007. # so the condition really shouldn't be filtering anything
  1008. assert data1 == data2
  1009. class AddJitterToSeriesTest(TestCase):
  1010. def setUp(self):
  1011. self.db = SnubaTSDB()
  1012. def run_test(self, end, interval, jitter, expected_start, expected_end):
  1013. end = end.replace(tzinfo=pytz.UTC)
  1014. start = end - interval
  1015. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  1016. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  1017. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  1018. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  1019. def test(self):
  1020. self.run_test(
  1021. end=datetime(2022, 5, 18, 10, 23, 4),
  1022. interval=timedelta(hours=1),
  1023. jitter=5,
  1024. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  1025. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  1026. )
  1027. self.run_test(
  1028. end=datetime(2022, 5, 18, 10, 23, 8),
  1029. interval=timedelta(hours=1),
  1030. jitter=5,
  1031. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  1032. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  1033. )
  1034. # Jitter should be the same
  1035. self.run_test(
  1036. end=datetime(2022, 5, 18, 10, 23, 8),
  1037. interval=timedelta(hours=1),
  1038. jitter=55,
  1039. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  1040. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  1041. )
  1042. self.run_test(
  1043. end=datetime(2022, 5, 18, 22, 33, 2),
  1044. interval=timedelta(minutes=1),
  1045. jitter=3,
  1046. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  1047. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  1048. )
  1049. def test_empty_series(self):
  1050. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  1051. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []