test_tsdb_backend.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. from snuba_sdk import Limit
  4. from sentry.issues.grouptype import ProfileFileIOGroupType
  5. from sentry.models import Environment, Group, GroupRelease, Release
  6. from sentry.testutils.cases import SnubaTestCase, TestCase
  7. from sentry.testutils.helpers.datetime import iso_format
  8. from sentry.testutils.silo import region_silo_test
  9. from sentry.tsdb.base import TSDBModel
  10. from sentry.tsdb.snuba import SnubaTSDB
  11. from sentry.utils.dates import to_datetime, to_timestamp
  12. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  13. def timestamp(d):
  14. t = int(to_timestamp(d))
  15. return t - (t % 3600)
  16. def has_shape(data, shape, allow_empty=False):
  17. """
  18. Determine if a data object has the provided shape
  19. At any level, the object in `data` and in `shape` must have the same type.
  20. A dict is the same shape if all its keys and values have the same shape as the
  21. key/value in `shape`. The number of keys/values is not relevant.
  22. A list is the same shape if all its items have the same shape as the value
  23. in `shape`
  24. A tuple is the same shape if it has the same length as `shape` and all the
  25. values have the same shape as the corresponding value in `shape`
  26. Any other object simply has to have the same type.
  27. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  28. """
  29. if not isinstance(data, type(shape)):
  30. return False
  31. if isinstance(data, dict):
  32. return (
  33. (allow_empty or len(data) > 0)
  34. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  35. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  36. )
  37. elif isinstance(data, list):
  38. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  39. elif isinstance(data, tuple):
  40. return len(data) == len(shape) and all(
  41. has_shape(data[i], shape[i]) for i in range(len(data))
  42. )
  43. else:
  44. return True
  45. class SnubaTSDBTest(TestCase, SnubaTestCase):
  46. def setUp(self):
  47. super().setUp()
  48. self.db = SnubaTSDB()
  49. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  50. hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  51. )
  52. self.proj1 = self.create_project()
  53. env1 = "test"
  54. env2 = "dev"
  55. defaultenv = ""
  56. release1 = "1" * 10
  57. release2 = "2" * 10
  58. self.release1 = Release.objects.create(
  59. organization_id=self.organization.id, version=release1, date_added=self.now
  60. )
  61. self.release1.add_project(self.proj1)
  62. self.release2 = Release.objects.create(
  63. organization_id=self.organization.id, version=release2, date_added=self.now
  64. )
  65. self.release2.add_project(self.proj1)
  66. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  67. self.store_event(
  68. data={
  69. "event_id": (str(r) * 32)[:32],
  70. "message": "message 1",
  71. "platform": "python",
  72. "fingerprint": [["group-1"], ["group-2"]][
  73. (r // 600) % 2
  74. ], # Switch every 10 mins
  75. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  76. "tags": {
  77. "foo": "bar",
  78. "baz": "quux",
  79. # Switch every 2 hours
  80. "environment": [env1, None][(r // 7200) % 3],
  81. "sentry:user": f"id:user{r // 3300}",
  82. },
  83. "user": {
  84. # change every 55 min so some hours have 1 user, some have 2
  85. "id": f"user{r // 3300}",
  86. },
  87. "release": str(r // 3600) * 10, # 1 per hour,
  88. },
  89. project_id=self.proj1.id,
  90. )
  91. groups = Group.objects.filter(project=self.proj1).order_by("id")
  92. self.proj1group1 = groups[0]
  93. self.proj1group2 = groups[1]
  94. self.env1 = Environment.objects.get(name=env1)
  95. self.env2 = self.create_environment(name=env2) # No events
  96. self.defaultenv = Environment.objects.get(name=defaultenv)
  97. self.group1release1env1 = GroupRelease.objects.get(
  98. project_id=self.proj1.id,
  99. group_id=self.proj1group1.id,
  100. release_id=self.release1.id,
  101. environment=env1,
  102. )
  103. self.group1release2env1 = GroupRelease.objects.create(
  104. project_id=self.proj1.id,
  105. group_id=self.proj1group1.id,
  106. release_id=self.release2.id,
  107. environment=env1,
  108. )
  109. self.group2release1env1 = GroupRelease.objects.get(
  110. project_id=self.proj1.id,
  111. group_id=self.proj1group2.id,
  112. release_id=self.release1.id,
  113. environment=env1,
  114. )
  115. def test_range_single(self):
  116. env1 = "test"
  117. project = self.create_project()
  118. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  119. self.store_event(
  120. data={
  121. "event_id": (str(r) * 32)[:32],
  122. "message": "message 1",
  123. "platform": "python",
  124. "fingerprint": ["group-1"],
  125. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  126. "tags": {
  127. "foo": "bar",
  128. "baz": "quux",
  129. # Switch every 2 hours
  130. "environment": [env1, None][(r // 7200) % 3],
  131. "sentry:user": f"id:user{r // 3300}",
  132. },
  133. "user": {
  134. # change every 55 min so some hours have 1 user, some have 2
  135. "id": f"user{r // 3300}",
  136. },
  137. "release": str(r // 3600) * 10, # 1 per hour,
  138. },
  139. project_id=project.id,
  140. )
  141. groups = Group.objects.filter(project=project).order_by("id")
  142. group = groups[0]
  143. dts = [self.now + timedelta(hours=i) for i in range(4)]
  144. assert self.db.get_range(
  145. TSDBModel.group,
  146. [group.id],
  147. dts[0],
  148. dts[-1],
  149. rollup=3600,
  150. tenant_ids={"referrer": "r", "organization_id": 1234},
  151. ) == {
  152. group.id: [
  153. (timestamp(dts[0]), 6 * 2),
  154. (timestamp(dts[1]), 6 * 2),
  155. (timestamp(dts[2]), 6 * 2),
  156. (timestamp(dts[3]), 6 * 2),
  157. ]
  158. }
  159. def test_range_groups(self):
  160. dts = [self.now + timedelta(hours=i) for i in range(4)]
  161. assert self.db.get_range(
  162. TSDBModel.group,
  163. [self.proj1group1.id],
  164. dts[0],
  165. dts[-1],
  166. rollup=3600,
  167. tenant_ids={"referrer": "r", "organization_id": 1234},
  168. ) == {
  169. self.proj1group1.id: [
  170. (timestamp(dts[0]), 3),
  171. (timestamp(dts[1]), 3),
  172. (timestamp(dts[2]), 3),
  173. (timestamp(dts[3]), 3),
  174. ]
  175. }
  176. # Multiple groups
  177. assert self.db.get_range(
  178. TSDBModel.group,
  179. [self.proj1group1.id, self.proj1group2.id],
  180. dts[0],
  181. dts[-1],
  182. rollup=3600,
  183. tenant_ids={"referrer": "r", "organization_id": 1234},
  184. ) == {
  185. self.proj1group1.id: [
  186. (timestamp(dts[0]), 3),
  187. (timestamp(dts[1]), 3),
  188. (timestamp(dts[2]), 3),
  189. (timestamp(dts[3]), 3),
  190. ],
  191. self.proj1group2.id: [
  192. (timestamp(dts[0]), 3),
  193. (timestamp(dts[1]), 3),
  194. (timestamp(dts[2]), 3),
  195. (timestamp(dts[3]), 3),
  196. ],
  197. }
  198. assert (
  199. self.db.get_range(
  200. TSDBModel.group,
  201. [],
  202. dts[0],
  203. dts[-1],
  204. rollup=3600,
  205. tenant_ids={"referrer": "test", "organization_id": 1},
  206. )
  207. == {}
  208. )
  209. def test_range_releases(self):
  210. dts = [self.now + timedelta(hours=i) for i in range(4)]
  211. assert self.db.get_range(
  212. TSDBModel.release,
  213. [self.release1.id],
  214. dts[0],
  215. dts[-1],
  216. rollup=3600,
  217. tenant_ids={"referrer": "r", "organization_id": 1234},
  218. ) == {
  219. self.release1.id: [
  220. (timestamp(dts[0]), 0),
  221. (timestamp(dts[1]), 6),
  222. (timestamp(dts[2]), 0),
  223. (timestamp(dts[3]), 0),
  224. ]
  225. }
  226. def test_range_project(self):
  227. dts = [self.now + timedelta(hours=i) for i in range(4)]
  228. assert self.db.get_range(
  229. TSDBModel.project,
  230. [self.proj1.id],
  231. dts[0],
  232. dts[-1],
  233. rollup=3600,
  234. tenant_ids={"referrer": "r", "organization_id": 1234},
  235. ) == {
  236. self.proj1.id: [
  237. (timestamp(dts[0]), 6),
  238. (timestamp(dts[1]), 6),
  239. (timestamp(dts[2]), 6),
  240. (timestamp(dts[3]), 6),
  241. ]
  242. }
  243. def test_range_environment_filter(self):
  244. dts = [self.now + timedelta(hours=i) for i in range(4)]
  245. assert self.db.get_range(
  246. TSDBModel.project,
  247. [self.proj1.id],
  248. dts[0],
  249. dts[-1],
  250. rollup=3600,
  251. environment_ids=[self.env1.id],
  252. tenant_ids={"referrer": "r", "organization_id": 1234},
  253. ) == {
  254. self.proj1.id: [
  255. (timestamp(dts[0]), 6),
  256. (timestamp(dts[1]), 6),
  257. (timestamp(dts[2]), 0),
  258. (timestamp(dts[3]), 0),
  259. ]
  260. }
  261. # No events submitted for env2
  262. assert self.db.get_range(
  263. TSDBModel.project,
  264. [self.proj1.id],
  265. dts[0],
  266. dts[-1],
  267. rollup=3600,
  268. environment_ids=[self.env2.id],
  269. tenant_ids={"referrer": "r", "organization_id": 1234},
  270. ) == {
  271. self.proj1.id: [
  272. (timestamp(dts[0]), 0),
  273. (timestamp(dts[1]), 0),
  274. (timestamp(dts[2]), 0),
  275. (timestamp(dts[3]), 0),
  276. ]
  277. }
  278. # Events submitted with no environment should match default environment
  279. assert self.db.get_range(
  280. TSDBModel.project,
  281. [self.proj1.id],
  282. dts[0],
  283. dts[-1],
  284. rollup=3600,
  285. environment_ids=[self.defaultenv.id],
  286. tenant_ids={"referrer": "r", "organization_id": 1234},
  287. ) == {
  288. self.proj1.id: [
  289. (timestamp(dts[0]), 0),
  290. (timestamp(dts[1]), 0),
  291. (timestamp(dts[2]), 6),
  292. (timestamp(dts[3]), 6),
  293. ]
  294. }
  295. def test_range_rollups(self):
  296. # Daily
  297. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  298. dts = [daystart + timedelta(days=i) for i in range(2)]
  299. assert self.db.get_range(
  300. TSDBModel.project,
  301. [self.proj1.id],
  302. dts[0],
  303. dts[-1],
  304. rollup=86400,
  305. tenant_ids={"referrer": "r", "organization_id": 1234},
  306. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  307. # Minutely
  308. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  309. # Expect every 10th minute to have a 1, else 0
  310. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  311. assert self.db.get_range(
  312. TSDBModel.project,
  313. [self.proj1.id],
  314. dts[0],
  315. dts[-1],
  316. rollup=60,
  317. tenant_ids={"referrer": "r", "organization_id": 1234},
  318. ) == {self.proj1.id: expected}
  319. def test_distinct_counts_series_users(self):
  320. dts = [self.now + timedelta(hours=i) for i in range(4)]
  321. assert self.db.get_distinct_counts_series(
  322. TSDBModel.users_affected_by_group,
  323. [self.proj1group1.id],
  324. dts[0],
  325. dts[-1],
  326. rollup=3600,
  327. tenant_ids={"referrer": "r", "organization_id": 1234},
  328. ) == {
  329. self.proj1group1.id: [
  330. (timestamp(dts[0]), 1),
  331. (timestamp(dts[1]), 1),
  332. (timestamp(dts[2]), 1),
  333. (timestamp(dts[3]), 2),
  334. ]
  335. }
  336. dts = [self.now + timedelta(hours=i) for i in range(4)]
  337. assert self.db.get_distinct_counts_series(
  338. TSDBModel.users_affected_by_project,
  339. [self.proj1.id],
  340. dts[0],
  341. dts[-1],
  342. rollup=3600,
  343. tenant_ids={"referrer": "r", "organization_id": 1234},
  344. ) == {
  345. self.proj1.id: [
  346. (timestamp(dts[0]), 1),
  347. (timestamp(dts[1]), 2),
  348. (timestamp(dts[2]), 2),
  349. (timestamp(dts[3]), 2),
  350. ]
  351. }
  352. assert (
  353. self.db.get_distinct_counts_series(
  354. TSDBModel.users_affected_by_group,
  355. [],
  356. dts[0],
  357. dts[-1],
  358. rollup=3600,
  359. tenant_ids={"referrer": "r", "organization_id": 1234},
  360. )
  361. == {}
  362. )
  363. def get_distinct_counts_totals_users(self):
  364. assert self.db.get_distinct_counts_totals(
  365. TSDBModel.users_affected_by_group,
  366. [self.proj1group1.id],
  367. self.now,
  368. self.now + timedelta(hours=4),
  369. rollup=3600,
  370. tenant_ids={"referrer": "r", "organization_id": 1234},
  371. ) == {
  372. self.proj1group1.id: 2 # 2 unique users overall
  373. }
  374. assert self.db.get_distinct_counts_totals(
  375. TSDBModel.users_affected_by_group,
  376. [self.proj1group1.id],
  377. self.now,
  378. self.now,
  379. rollup=3600,
  380. tenant_ids={"referrer": "r", "organization_id": 1234},
  381. ) == {
  382. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  383. }
  384. assert self.db.get_distinct_counts_totals(
  385. TSDBModel.users_affected_by_project,
  386. [self.proj1.id],
  387. self.now,
  388. self.now + timedelta(hours=4),
  389. rollup=3600,
  390. tenant_ids={"referrer": "r", "organization_id": 1234},
  391. ) == {self.proj1.id: 2}
  392. assert (
  393. self.db.get_distinct_counts_totals(
  394. TSDBModel.users_affected_by_group,
  395. [],
  396. self.now,
  397. self.now + timedelta(hours=4),
  398. rollup=3600,
  399. tenant_ids={"referrer": "r", "organization_id": 1234},
  400. )
  401. == {}
  402. )
  403. def test_most_frequent(self):
  404. assert self.db.get_most_frequent(
  405. TSDBModel.frequent_issues_by_project,
  406. [self.proj1.id],
  407. self.now,
  408. self.now + timedelta(hours=4),
  409. rollup=3600,
  410. tenant_ids={"referrer": "r", "organization_id": 1234},
  411. ) in [
  412. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  413. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  414. ] # Both issues equally frequent
  415. assert (
  416. self.db.get_most_frequent(
  417. TSDBModel.frequent_issues_by_project,
  418. [],
  419. self.now,
  420. self.now + timedelta(hours=4),
  421. rollup=3600,
  422. tenant_ids={"referrer": "r", "organization_id": 1234},
  423. )
  424. == {}
  425. )
  426. def test_frequency_series(self):
  427. dts = [self.now + timedelta(hours=i) for i in range(4)]
  428. assert self.db.get_frequency_series(
  429. TSDBModel.frequent_releases_by_group,
  430. {
  431. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  432. self.proj1group2.id: (self.group2release1env1.id,),
  433. },
  434. dts[0],
  435. dts[-1],
  436. rollup=3600,
  437. tenant_ids={"referrer": "r", "organization_id": 1234},
  438. ) == {
  439. self.proj1group1.id: [
  440. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  441. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  442. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  443. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  444. ],
  445. self.proj1group2.id: [
  446. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  447. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  448. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  449. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  450. ],
  451. }
  452. assert (
  453. self.db.get_frequency_series(
  454. TSDBModel.frequent_releases_by_group,
  455. {},
  456. dts[0],
  457. dts[-1],
  458. rollup=3600,
  459. tenant_ids={"referrer": "r", "organization_id": 1234},
  460. )
  461. == {}
  462. )
  463. def test_result_shape(self):
  464. """
  465. Tests that the results from the different TSDB methods have the
  466. expected format.
  467. """
  468. project_id = self.proj1.id
  469. dts = [self.now + timedelta(hours=i) for i in range(4)]
  470. results = self.db.get_most_frequent(
  471. TSDBModel.frequent_issues_by_project,
  472. [project_id],
  473. dts[0],
  474. dts[0],
  475. tenant_ids={"referrer": "r", "organization_id": 1234},
  476. )
  477. assert has_shape(results, {1: [(1, 1.0)]})
  478. results = self.db.get_most_frequent_series(
  479. TSDBModel.frequent_issues_by_project,
  480. [project_id],
  481. dts[0],
  482. dts[0],
  483. tenant_ids={"referrer": "r", "organization_id": 1234},
  484. )
  485. assert has_shape(results, {1: [(1, {1: 1.0})]})
  486. items = {
  487. # {project_id: (issue_id, issue_id, ...)}
  488. project_id: (self.proj1group1.id, self.proj1group2.id)
  489. }
  490. results = self.db.get_frequency_series(
  491. TSDBModel.frequent_issues_by_project,
  492. items,
  493. dts[0],
  494. dts[-1],
  495. tenant_ids={"referrer": "r", "organization_id": 1234},
  496. )
  497. assert has_shape(results, {1: [(1, {1: 1})]})
  498. results = self.db.get_frequency_totals(
  499. TSDBModel.frequent_issues_by_project,
  500. items,
  501. dts[0],
  502. dts[-1],
  503. tenant_ids={"referrer": "r", "organization_id": 1234},
  504. )
  505. assert has_shape(results, {1: {1: 1}})
  506. results = self.db.get_range(
  507. TSDBModel.project,
  508. [project_id],
  509. dts[0],
  510. dts[-1],
  511. tenant_ids={"referrer": "r", "organization_id": 1234},
  512. )
  513. assert has_shape(results, {1: [(1, 1)]})
  514. results = self.db.get_distinct_counts_series(
  515. TSDBModel.users_affected_by_project,
  516. [project_id],
  517. dts[0],
  518. dts[-1],
  519. tenant_ids={"referrer": "r", "organization_id": 1234},
  520. )
  521. assert has_shape(results, {1: [(1, 1)]})
  522. results = self.db.get_distinct_counts_totals(
  523. TSDBModel.users_affected_by_project,
  524. [project_id],
  525. dts[0],
  526. dts[-1],
  527. tenant_ids={"referrer": "r", "organization_id": 1234},
  528. )
  529. assert has_shape(results, {1: 1})
  530. results = self.db.get_distinct_counts_union(
  531. TSDBModel.users_affected_by_project,
  532. [project_id],
  533. dts[0],
  534. dts[-1],
  535. tenant_ids={"referrer": "r", "organization_id": 1234},
  536. )
  537. assert has_shape(results, 1)
  538. def test_calculated_limit(self):
  539. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  540. # 24h test
  541. rollup = 3600
  542. end = self.now
  543. start = end + timedelta(days=-1, seconds=rollup)
  544. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  545. assert snuba.call_args.args[0].query.limit == Limit(120)
  546. # 14 day test
  547. rollup = 86400
  548. start = end + timedelta(days=-14, seconds=rollup)
  549. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  550. assert snuba.call_args.args[0].query.limit == Limit(70)
  551. # 1h test
  552. rollup = 3600
  553. end = self.now
  554. start = end + timedelta(hours=-1, seconds=rollup)
  555. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  556. assert snuba.call_args.args[0].query.limit == Limit(5)
  557. @patch("sentry.utils.snuba.OVERRIDE_OPTIONS", new={"consistent": True})
  558. def test_tsdb_with_consistent(self):
  559. with patch("sentry.utils.snuba._apply_cache_and_build_results") as snuba:
  560. rollup = 3600
  561. end = self.now
  562. start = end + timedelta(days=-1, seconds=rollup)
  563. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  564. assert snuba.call_args.args[0][0][0].query.limit == Limit(120)
  565. assert snuba.call_args.args[0][0][0].flags.consistent is True
  566. @region_silo_test
  567. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  568. def setUp(self):
  569. super().setUp()
  570. self.db = SnubaTSDB()
  571. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  572. hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  573. )
  574. self.proj1 = self.create_project()
  575. self.env1 = Environment.objects.get_or_create(
  576. organization_id=self.proj1.organization_id, name="test"
  577. )[0]
  578. self.env2 = Environment.objects.get_or_create(
  579. organization_id=self.proj1.organization_id, name="dev"
  580. )[0]
  581. defaultenv = ""
  582. group1_fingerprint = f"{ProfileFileIOGroupType.type_id}-group1"
  583. group2_fingerprint = f"{ProfileFileIOGroupType.type_id}-group2"
  584. groups = {}
  585. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  586. event, occurrence, group_info = self.store_search_issue(
  587. project_id=self.proj1.id,
  588. # change every 55 min so some hours have 1 user, some have 2
  589. user_id=r // 3300,
  590. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  591. # release_version=str(r // 3600) * 10, # 1 per hour,
  592. environment=[self.env1.name, None][(r // 7200) % 3],
  593. insert_time=self.now + timedelta(seconds=r),
  594. )
  595. if group_info:
  596. groups[group_info.group.id] = group_info.group
  597. all_groups = list(groups.values())
  598. self.proj1group1 = all_groups[0]
  599. self.proj1group2 = all_groups[1]
  600. self.defaultenv = Environment.objects.get(name=defaultenv)
  601. def test_range_group_manual_group_time_rollup(self):
  602. project = self.create_project()
  603. # these are the only granularities/rollups that be actually be used
  604. GRANULARITIES = [
  605. (10, timedelta(seconds=10), 5),
  606. (60 * 60, timedelta(hours=1), 6),
  607. (60 * 60 * 24, timedelta(days=1), 15),
  608. ]
  609. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  610. hour=0, minute=0, second=0
  611. )
  612. for step, delta, times in GRANULARITIES:
  613. series = [start + (delta * i) for i in range(times)]
  614. series_ts = [int(to_timestamp(ts)) for ts in series]
  615. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  616. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  617. step,
  618. series_ts,
  619. )
  620. for time_step in series:
  621. _, _, group_info = self.store_search_issue(
  622. project_id=project.id,
  623. user_id=0,
  624. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  625. environment=None,
  626. insert_time=time_step,
  627. )
  628. assert group_info is not None
  629. assert self.db.get_range(
  630. TSDBModel.group_generic,
  631. [group_info.group.id],
  632. series[0],
  633. series[-1],
  634. rollup=None,
  635. tenant_ids={"referrer": "test", "organization_id": 1},
  636. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  637. def test_range_groups_mult(self):
  638. now = (datetime.utcnow() - timedelta(days=1)).replace(
  639. hour=10, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  640. )
  641. dts = [now + timedelta(hours=i) for i in range(4)]
  642. project = self.create_project()
  643. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group4"
  644. groups = []
  645. for i in range(0, 11):
  646. _, _, group_info = self.store_search_issue(
  647. project_id=project.id,
  648. user_id=0,
  649. fingerprints=[group_fingerprint],
  650. environment=None,
  651. insert_time=now + timedelta(minutes=i * 10),
  652. )
  653. if group_info:
  654. groups.append(group_info.group)
  655. group = groups[0]
  656. assert self.db.get_range(
  657. TSDBModel.group_generic,
  658. [group.id],
  659. dts[0],
  660. dts[-1],
  661. rollup=3600,
  662. tenant_ids={"referrer": "test", "organization_id": 1},
  663. ) == {
  664. group.id: [
  665. (timestamp(dts[0]), 6),
  666. (timestamp(dts[1]), 5),
  667. (timestamp(dts[2]), 0),
  668. (timestamp(dts[3]), 0),
  669. ]
  670. }
  671. def test_range_groups_simple(self):
  672. project = self.create_project()
  673. now = (datetime.utcnow() - timedelta(days=1)).replace(
  674. hour=10, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  675. )
  676. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group5"
  677. ids = [1, 2, 3, 4, 5]
  678. groups = []
  679. for r in ids:
  680. # for r in range(0, 9, 1):
  681. event, occurrence, group_info = self.store_search_issue(
  682. project_id=project.id,
  683. # change every 55 min so some hours have 1 user, some have 2
  684. user_id=r,
  685. fingerprints=[group_fingerprint],
  686. environment=None,
  687. # release_version=str(r // 3600) * 10, # 1 per hour,
  688. insert_time=now,
  689. )
  690. if group_info:
  691. groups.append(group_info.group)
  692. group = groups[0]
  693. dts = [now + timedelta(hours=i) for i in range(4)]
  694. assert self.db.get_range(
  695. TSDBModel.group_generic,
  696. [group.id],
  697. dts[0],
  698. dts[-1],
  699. rollup=3600,
  700. tenant_ids={"referrer": "test", "organization_id": 1},
  701. ) == {
  702. group.id: [
  703. (timestamp(dts[0]), len(ids)),
  704. (timestamp(dts[1]), 0),
  705. (timestamp(dts[2]), 0),
  706. (timestamp(dts[3]), 0),
  707. ]
  708. }
  709. def test_range_groups(self):
  710. dts = [self.now + timedelta(hours=i) for i in range(4)]
  711. # Multiple groups
  712. assert self.db.get_range(
  713. TSDBModel.group_generic,
  714. [self.proj1group1.id, self.proj1group2.id],
  715. dts[0],
  716. dts[-1],
  717. rollup=3600,
  718. tenant_ids={"referrer": "test", "organization_id": 1},
  719. ) == {
  720. self.proj1group1.id: [
  721. (timestamp(dts[0]), 3),
  722. (timestamp(dts[1]), 3),
  723. (timestamp(dts[2]), 3),
  724. (timestamp(dts[3]), 3),
  725. ],
  726. self.proj1group2.id: [
  727. (timestamp(dts[0]), 3),
  728. (timestamp(dts[1]), 3),
  729. (timestamp(dts[2]), 3),
  730. (timestamp(dts[3]), 3),
  731. ],
  732. }
  733. assert (
  734. self.db.get_range(
  735. TSDBModel.group_generic,
  736. [],
  737. dts[0],
  738. dts[-1],
  739. rollup=3600,
  740. tenant_ids={"referrer": "test", "organization_id": 1},
  741. )
  742. == {}
  743. )
  744. def test_get_distinct_counts_totals_users(self):
  745. assert self.db.get_distinct_counts_totals(
  746. TSDBModel.users_affected_by_generic_group,
  747. [self.proj1group1.id],
  748. self.now,
  749. self.now + timedelta(hours=4),
  750. rollup=3600,
  751. tenant_ids={"referrer": "test", "organization_id": 1},
  752. ) == {
  753. self.proj1group1.id: 5 # 5 unique users overall
  754. }
  755. assert self.db.get_distinct_counts_totals(
  756. TSDBModel.users_affected_by_generic_group,
  757. [self.proj1group1.id],
  758. self.now,
  759. self.now,
  760. rollup=3600,
  761. tenant_ids={"referrer": "test", "organization_id": 1},
  762. ) == {
  763. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  764. }
  765. assert (
  766. self.db.get_distinct_counts_totals(
  767. TSDBModel.users_affected_by_generic_group,
  768. [],
  769. self.now,
  770. self.now + timedelta(hours=4),
  771. rollup=3600,
  772. tenant_ids={"referrer": "test", "organization_id": 1},
  773. )
  774. == {}
  775. )
  776. def test_get_sums(self):
  777. assert self.db.get_sums(
  778. model=TSDBModel.group_generic,
  779. keys=[self.proj1group1.id, self.proj1group2.id],
  780. start=self.now,
  781. end=self.now + timedelta(hours=4),
  782. tenant_ids={"referrer": "test", "organization_id": 1},
  783. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  784. def test_get_data_or_conditions_parsed(self):
  785. """
  786. Verify parsing the legacy format with nested OR conditions works
  787. """
  788. conditions = [
  789. # or conditions in the legacy format needs open and close brackets for precedence
  790. # there's some special casing when parsing conditions that specifically handles this
  791. [
  792. [["isNull", ["environment"]], "=", 1],
  793. ["environment", "IN", [self.env1.name]],
  794. ]
  795. ]
  796. data1 = self.db.get_data(
  797. model=TSDBModel.group_generic,
  798. keys=[self.proj1group1.id, self.proj1group2.id],
  799. conditions=conditions,
  800. start=self.now,
  801. end=self.now + timedelta(hours=4),
  802. tenant_ids={"referrer": "test", "organization_id": 1},
  803. )
  804. data2 = self.db.get_data(
  805. model=TSDBModel.group_generic,
  806. keys=[self.proj1group1.id, self.proj1group2.id],
  807. start=self.now,
  808. end=self.now + timedelta(hours=4),
  809. tenant_ids={"referrer": "test", "organization_id": 1},
  810. )
  811. # the above queries should return the same data since all groups either have:
  812. # environment=None or environment=test
  813. # so the condition really shouldn't be filtering anything
  814. assert data1 == data2
  815. class AddJitterToSeriesTest(TestCase):
  816. def setUp(self):
  817. self.db = SnubaTSDB()
  818. def run_test(self, end, interval, jitter, expected_start, expected_end):
  819. end = end.replace(tzinfo=timezone.utc)
  820. start = end - interval
  821. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  822. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  823. assert to_datetime(series[0]) == expected_start.replace(tzinfo=timezone.utc)
  824. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=timezone.utc)
  825. def test(self):
  826. self.run_test(
  827. end=datetime(2022, 5, 18, 10, 23, 4),
  828. interval=timedelta(hours=1),
  829. jitter=5,
  830. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  831. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  832. )
  833. self.run_test(
  834. end=datetime(2022, 5, 18, 10, 23, 8),
  835. interval=timedelta(hours=1),
  836. jitter=5,
  837. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  838. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  839. )
  840. # Jitter should be the same
  841. self.run_test(
  842. end=datetime(2022, 5, 18, 10, 23, 8),
  843. interval=timedelta(hours=1),
  844. jitter=55,
  845. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  846. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  847. )
  848. self.run_test(
  849. end=datetime(2022, 5, 18, 22, 33, 2),
  850. interval=timedelta(minutes=1),
  851. jitter=3,
  852. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  853. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  854. )
  855. def test_empty_series(self):
  856. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  857. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []