test_tsdb_backend.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. import pytz
  4. from snuba_sdk import Limit
  5. from sentry.issues.grouptype import ProfileFileIOGroupType
  6. from sentry.models import Environment, Group, GroupRelease, Release
  7. from sentry.testutils import SnubaTestCase, TestCase
  8. from sentry.testutils.helpers.datetime import iso_format
  9. from sentry.testutils.silo import region_silo_test
  10. from sentry.tsdb.base import TSDBModel
  11. from sentry.tsdb.snuba import SnubaTSDB
  12. from sentry.utils.dates import to_datetime, to_timestamp
  13. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  14. def timestamp(d):
  15. t = int(to_timestamp(d))
  16. return t - (t % 3600)
  17. def has_shape(data, shape, allow_empty=False):
  18. """
  19. Determine if a data object has the provided shape
  20. At any level, the object in `data` and in `shape` must have the same type.
  21. A dict is the same shape if all its keys and values have the same shape as the
  22. key/value in `shape`. The number of keys/values is not relevant.
  23. A list is the same shape if all its items have the same shape as the value
  24. in `shape`
  25. A tuple is the same shape if it has the same length as `shape` and all the
  26. values have the same shape as the corresponding value in `shape`
  27. Any other object simply has to have the same type.
  28. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  29. """
  30. if not isinstance(data, type(shape)):
  31. return False
  32. if isinstance(data, dict):
  33. return (
  34. (allow_empty or len(data) > 0)
  35. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  36. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  37. )
  38. elif isinstance(data, list):
  39. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  40. elif isinstance(data, tuple):
  41. return len(data) == len(shape) and all(
  42. has_shape(data[i], shape[i]) for i in range(len(data))
  43. )
  44. else:
  45. return True
  46. class SnubaTSDBTest(TestCase, SnubaTestCase):
  47. def setUp(self):
  48. super().setUp()
  49. self.db = SnubaTSDB()
  50. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  51. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  52. )
  53. self.proj1 = self.create_project()
  54. env1 = "test"
  55. env2 = "dev"
  56. defaultenv = ""
  57. release1 = "1" * 10
  58. release2 = "2" * 10
  59. self.release1 = Release.objects.create(
  60. organization_id=self.organization.id, version=release1, date_added=self.now
  61. )
  62. self.release1.add_project(self.proj1)
  63. self.release2 = Release.objects.create(
  64. organization_id=self.organization.id, version=release2, date_added=self.now
  65. )
  66. self.release2.add_project(self.proj1)
  67. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  68. self.store_event(
  69. data={
  70. "event_id": (str(r) * 32)[:32],
  71. "message": "message 1",
  72. "platform": "python",
  73. "fingerprint": [["group-1"], ["group-2"]][
  74. (r // 600) % 2
  75. ], # Switch every 10 mins
  76. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  77. "tags": {
  78. "foo": "bar",
  79. "baz": "quux",
  80. # Switch every 2 hours
  81. "environment": [env1, None][(r // 7200) % 3],
  82. "sentry:user": f"id:user{r // 3300}",
  83. },
  84. "user": {
  85. # change every 55 min so some hours have 1 user, some have 2
  86. "id": f"user{r // 3300}",
  87. },
  88. "release": str(r // 3600) * 10, # 1 per hour,
  89. },
  90. project_id=self.proj1.id,
  91. )
  92. groups = Group.objects.filter(project=self.proj1).order_by("id")
  93. self.proj1group1 = groups[0]
  94. self.proj1group2 = groups[1]
  95. self.env1 = Environment.objects.get(name=env1)
  96. self.env2 = self.create_environment(name=env2) # No events
  97. self.defaultenv = Environment.objects.get(name=defaultenv)
  98. self.group1release1env1 = GroupRelease.objects.get(
  99. project_id=self.proj1.id,
  100. group_id=self.proj1group1.id,
  101. release_id=self.release1.id,
  102. environment=env1,
  103. )
  104. self.group1release2env1 = GroupRelease.objects.create(
  105. project_id=self.proj1.id,
  106. group_id=self.proj1group1.id,
  107. release_id=self.release2.id,
  108. environment=env1,
  109. )
  110. self.group2release1env1 = GroupRelease.objects.get(
  111. project_id=self.proj1.id,
  112. group_id=self.proj1group2.id,
  113. release_id=self.release1.id,
  114. environment=env1,
  115. )
  116. def test_range_single(self):
  117. env1 = "test"
  118. project = self.create_project()
  119. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  120. self.store_event(
  121. data={
  122. "event_id": (str(r) * 32)[:32],
  123. "message": "message 1",
  124. "platform": "python",
  125. "fingerprint": ["group-1"],
  126. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  127. "tags": {
  128. "foo": "bar",
  129. "baz": "quux",
  130. # Switch every 2 hours
  131. "environment": [env1, None][(r // 7200) % 3],
  132. "sentry:user": f"id:user{r // 3300}",
  133. },
  134. "user": {
  135. # change every 55 min so some hours have 1 user, some have 2
  136. "id": f"user{r // 3300}",
  137. },
  138. "release": str(r // 3600) * 10, # 1 per hour,
  139. },
  140. project_id=project.id,
  141. )
  142. groups = Group.objects.filter(project=project).order_by("id")
  143. group = groups[0]
  144. dts = [self.now + timedelta(hours=i) for i in range(4)]
  145. assert self.db.get_range(
  146. TSDBModel.group,
  147. [group.id],
  148. dts[0],
  149. dts[-1],
  150. rollup=3600,
  151. tenant_ids={"referrer": "r", "organization_id": 1234},
  152. ) == {
  153. group.id: [
  154. (timestamp(dts[0]), 6 * 2),
  155. (timestamp(dts[1]), 6 * 2),
  156. (timestamp(dts[2]), 6 * 2),
  157. (timestamp(dts[3]), 6 * 2),
  158. ]
  159. }
  160. def test_range_groups(self):
  161. dts = [self.now + timedelta(hours=i) for i in range(4)]
  162. assert self.db.get_range(
  163. TSDBModel.group,
  164. [self.proj1group1.id],
  165. dts[0],
  166. dts[-1],
  167. rollup=3600,
  168. tenant_ids={"referrer": "r", "organization_id": 1234},
  169. ) == {
  170. self.proj1group1.id: [
  171. (timestamp(dts[0]), 3),
  172. (timestamp(dts[1]), 3),
  173. (timestamp(dts[2]), 3),
  174. (timestamp(dts[3]), 3),
  175. ]
  176. }
  177. # Multiple groups
  178. assert self.db.get_range(
  179. TSDBModel.group,
  180. [self.proj1group1.id, self.proj1group2.id],
  181. dts[0],
  182. dts[-1],
  183. rollup=3600,
  184. tenant_ids={"referrer": "r", "organization_id": 1234},
  185. ) == {
  186. self.proj1group1.id: [
  187. (timestamp(dts[0]), 3),
  188. (timestamp(dts[1]), 3),
  189. (timestamp(dts[2]), 3),
  190. (timestamp(dts[3]), 3),
  191. ],
  192. self.proj1group2.id: [
  193. (timestamp(dts[0]), 3),
  194. (timestamp(dts[1]), 3),
  195. (timestamp(dts[2]), 3),
  196. (timestamp(dts[3]), 3),
  197. ],
  198. }
  199. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  200. def test_range_releases(self):
  201. dts = [self.now + timedelta(hours=i) for i in range(4)]
  202. assert self.db.get_range(
  203. TSDBModel.release,
  204. [self.release1.id],
  205. dts[0],
  206. dts[-1],
  207. rollup=3600,
  208. tenant_ids={"referrer": "r", "organization_id": 1234},
  209. ) == {
  210. self.release1.id: [
  211. (timestamp(dts[0]), 0),
  212. (timestamp(dts[1]), 6),
  213. (timestamp(dts[2]), 0),
  214. (timestamp(dts[3]), 0),
  215. ]
  216. }
  217. def test_range_project(self):
  218. dts = [self.now + timedelta(hours=i) for i in range(4)]
  219. assert self.db.get_range(
  220. TSDBModel.project,
  221. [self.proj1.id],
  222. dts[0],
  223. dts[-1],
  224. rollup=3600,
  225. tenant_ids={"referrer": "r", "organization_id": 1234},
  226. ) == {
  227. self.proj1.id: [
  228. (timestamp(dts[0]), 6),
  229. (timestamp(dts[1]), 6),
  230. (timestamp(dts[2]), 6),
  231. (timestamp(dts[3]), 6),
  232. ]
  233. }
  234. def test_range_environment_filter(self):
  235. dts = [self.now + timedelta(hours=i) for i in range(4)]
  236. assert self.db.get_range(
  237. TSDBModel.project,
  238. [self.proj1.id],
  239. dts[0],
  240. dts[-1],
  241. rollup=3600,
  242. environment_ids=[self.env1.id],
  243. tenant_ids={"referrer": "r", "organization_id": 1234},
  244. ) == {
  245. self.proj1.id: [
  246. (timestamp(dts[0]), 6),
  247. (timestamp(dts[1]), 6),
  248. (timestamp(dts[2]), 0),
  249. (timestamp(dts[3]), 0),
  250. ]
  251. }
  252. # No events submitted for env2
  253. assert self.db.get_range(
  254. TSDBModel.project,
  255. [self.proj1.id],
  256. dts[0],
  257. dts[-1],
  258. rollup=3600,
  259. environment_ids=[self.env2.id],
  260. tenant_ids={"referrer": "r", "organization_id": 1234},
  261. ) == {
  262. self.proj1.id: [
  263. (timestamp(dts[0]), 0),
  264. (timestamp(dts[1]), 0),
  265. (timestamp(dts[2]), 0),
  266. (timestamp(dts[3]), 0),
  267. ]
  268. }
  269. # Events submitted with no environment should match default environment
  270. assert self.db.get_range(
  271. TSDBModel.project,
  272. [self.proj1.id],
  273. dts[0],
  274. dts[-1],
  275. rollup=3600,
  276. environment_ids=[self.defaultenv.id],
  277. tenant_ids={"referrer": "r", "organization_id": 1234},
  278. ) == {
  279. self.proj1.id: [
  280. (timestamp(dts[0]), 0),
  281. (timestamp(dts[1]), 0),
  282. (timestamp(dts[2]), 6),
  283. (timestamp(dts[3]), 6),
  284. ]
  285. }
  286. def test_range_rollups(self):
  287. # Daily
  288. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  289. dts = [daystart + timedelta(days=i) for i in range(2)]
  290. assert self.db.get_range(
  291. TSDBModel.project,
  292. [self.proj1.id],
  293. dts[0],
  294. dts[-1],
  295. rollup=86400,
  296. tenant_ids={"referrer": "r", "organization_id": 1234},
  297. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  298. # Minutely
  299. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  300. # Expect every 10th minute to have a 1, else 0
  301. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  302. assert self.db.get_range(
  303. TSDBModel.project,
  304. [self.proj1.id],
  305. dts[0],
  306. dts[-1],
  307. rollup=60,
  308. tenant_ids={"referrer": "r", "organization_id": 1234},
  309. ) == {self.proj1.id: expected}
  310. def test_distinct_counts_series_users(self):
  311. dts = [self.now + timedelta(hours=i) for i in range(4)]
  312. assert self.db.get_distinct_counts_series(
  313. TSDBModel.users_affected_by_group,
  314. [self.proj1group1.id],
  315. dts[0],
  316. dts[-1],
  317. rollup=3600,
  318. tenant_ids={"referrer": "r", "organization_id": 1234},
  319. ) == {
  320. self.proj1group1.id: [
  321. (timestamp(dts[0]), 1),
  322. (timestamp(dts[1]), 1),
  323. (timestamp(dts[2]), 1),
  324. (timestamp(dts[3]), 2),
  325. ]
  326. }
  327. dts = [self.now + timedelta(hours=i) for i in range(4)]
  328. assert self.db.get_distinct_counts_series(
  329. TSDBModel.users_affected_by_project,
  330. [self.proj1.id],
  331. dts[0],
  332. dts[-1],
  333. rollup=3600,
  334. tenant_ids={"referrer": "r", "organization_id": 1234},
  335. ) == {
  336. self.proj1.id: [
  337. (timestamp(dts[0]), 1),
  338. (timestamp(dts[1]), 2),
  339. (timestamp(dts[2]), 2),
  340. (timestamp(dts[3]), 2),
  341. ]
  342. }
  343. assert (
  344. self.db.get_distinct_counts_series(
  345. TSDBModel.users_affected_by_group,
  346. [],
  347. dts[0],
  348. dts[-1],
  349. rollup=3600,
  350. tenant_ids={"referrer": "r", "organization_id": 1234},
  351. )
  352. == {}
  353. )
  354. def get_distinct_counts_totals_users(self):
  355. assert self.db.get_distinct_counts_totals(
  356. TSDBModel.users_affected_by_group,
  357. [self.proj1group1.id],
  358. self.now,
  359. self.now + timedelta(hours=4),
  360. rollup=3600,
  361. ) == {
  362. self.proj1group1.id: 2 # 2 unique users overall
  363. }
  364. assert self.db.get_distinct_counts_totals(
  365. TSDBModel.users_affected_by_group,
  366. [self.proj1group1.id],
  367. self.now,
  368. self.now,
  369. rollup=3600,
  370. ) == {
  371. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  372. }
  373. assert self.db.get_distinct_counts_totals(
  374. TSDBModel.users_affected_by_project,
  375. [self.proj1.id],
  376. self.now,
  377. self.now + timedelta(hours=4),
  378. rollup=3600,
  379. ) == {self.proj1.id: 2}
  380. assert (
  381. self.db.get_distinct_counts_totals(
  382. TSDBModel.users_affected_by_group,
  383. [],
  384. self.now,
  385. self.now + timedelta(hours=4),
  386. rollup=3600,
  387. )
  388. == {}
  389. )
  390. def test_most_frequent(self):
  391. assert self.db.get_most_frequent(
  392. TSDBModel.frequent_issues_by_project,
  393. [self.proj1.id],
  394. self.now,
  395. self.now + timedelta(hours=4),
  396. rollup=3600,
  397. tenant_ids={"referrer": "r", "organization_id": 1234},
  398. ) in [
  399. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  400. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  401. ] # Both issues equally frequent
  402. assert (
  403. self.db.get_most_frequent(
  404. TSDBModel.frequent_issues_by_project,
  405. [],
  406. self.now,
  407. self.now + timedelta(hours=4),
  408. rollup=3600,
  409. tenant_ids={"referrer": "r", "organization_id": 1234},
  410. )
  411. == {}
  412. )
  413. def test_frequency_series(self):
  414. dts = [self.now + timedelta(hours=i) for i in range(4)]
  415. assert self.db.get_frequency_series(
  416. TSDBModel.frequent_releases_by_group,
  417. {
  418. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  419. self.proj1group2.id: (self.group2release1env1.id,),
  420. },
  421. dts[0],
  422. dts[-1],
  423. rollup=3600,
  424. tenant_ids={"referrer": "r", "organization_id": 1234},
  425. ) == {
  426. self.proj1group1.id: [
  427. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  428. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  429. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  430. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  431. ],
  432. self.proj1group2.id: [
  433. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  434. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  435. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  436. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  437. ],
  438. }
  439. assert (
  440. self.db.get_frequency_series(
  441. TSDBModel.frequent_releases_by_group,
  442. {},
  443. dts[0],
  444. dts[-1],
  445. rollup=3600,
  446. tenant_ids={"referrer": "r", "organization_id": 1234},
  447. )
  448. == {}
  449. )
  450. def test_result_shape(self):
  451. """
  452. Tests that the results from the different TSDB methods have the
  453. expected format.
  454. """
  455. project_id = self.proj1.id
  456. dts = [self.now + timedelta(hours=i) for i in range(4)]
  457. results = self.db.get_most_frequent(
  458. TSDBModel.frequent_issues_by_project,
  459. [project_id],
  460. dts[0],
  461. dts[0],
  462. tenant_ids={"referrer": "r", "organization_id": 1234},
  463. )
  464. assert has_shape(results, {1: [(1, 1.0)]})
  465. results = self.db.get_most_frequent_series(
  466. TSDBModel.frequent_issues_by_project,
  467. [project_id],
  468. dts[0],
  469. dts[0],
  470. tenant_ids={"referrer": "r", "organization_id": 1234},
  471. )
  472. assert has_shape(results, {1: [(1, {1: 1.0})]})
  473. items = {
  474. # {project_id: (issue_id, issue_id, ...)}
  475. project_id: (self.proj1group1.id, self.proj1group2.id)
  476. }
  477. results = self.db.get_frequency_series(
  478. TSDBModel.frequent_issues_by_project,
  479. items,
  480. dts[0],
  481. dts[-1],
  482. tenant_ids={"referrer": "r", "organization_id": 1234},
  483. )
  484. assert has_shape(results, {1: [(1, {1: 1})]})
  485. results = self.db.get_frequency_totals(
  486. TSDBModel.frequent_issues_by_project,
  487. items,
  488. dts[0],
  489. dts[-1],
  490. tenant_ids={"referrer": "r", "organization_id": 1234},
  491. )
  492. assert has_shape(results, {1: {1: 1}})
  493. results = self.db.get_range(
  494. TSDBModel.project,
  495. [project_id],
  496. dts[0],
  497. dts[-1],
  498. tenant_ids={"referrer": "r", "organization_id": 1234},
  499. )
  500. assert has_shape(results, {1: [(1, 1)]})
  501. results = self.db.get_distinct_counts_series(
  502. TSDBModel.users_affected_by_project,
  503. [project_id],
  504. dts[0],
  505. dts[-1],
  506. tenant_ids={"referrer": "r", "organization_id": 1234},
  507. )
  508. assert has_shape(results, {1: [(1, 1)]})
  509. results = self.db.get_distinct_counts_totals(
  510. TSDBModel.users_affected_by_project,
  511. [project_id],
  512. dts[0],
  513. dts[-1],
  514. tenant_ids={"referrer": "r", "organization_id": 1234},
  515. )
  516. assert has_shape(results, {1: 1})
  517. results = self.db.get_distinct_counts_union(
  518. TSDBModel.users_affected_by_project,
  519. [project_id],
  520. dts[0],
  521. dts[-1],
  522. tenant_ids={"referrer": "r", "organization_id": 1234},
  523. )
  524. assert has_shape(results, 1)
  525. def test_calculated_limit(self):
  526. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  527. # 24h test
  528. rollup = 3600
  529. end = self.now
  530. start = end + timedelta(days=-1, seconds=rollup)
  531. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  532. assert snuba.call_args.args[0].query.limit == Limit(120)
  533. # 14 day test
  534. rollup = 86400
  535. start = end + timedelta(days=-14, seconds=rollup)
  536. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  537. assert snuba.call_args.args[0].query.limit == Limit(70)
  538. # 1h test
  539. rollup = 3600
  540. end = self.now
  541. start = end + timedelta(hours=-1, seconds=rollup)
  542. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  543. assert snuba.call_args.args[0].query.limit == Limit(5)
  544. @patch("sentry.utils.snuba.OVERRIDE_OPTIONS", new={"consistent": True})
  545. def test_tsdb_with_consistent(self):
  546. with patch("sentry.utils.snuba._apply_cache_and_build_results") as snuba:
  547. rollup = 3600
  548. end = self.now
  549. start = end + timedelta(days=-1, seconds=rollup)
  550. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  551. assert snuba.call_args.args[0][0][0].query.limit == Limit(120)
  552. assert snuba.call_args.args[0][0][0].flags.consistent is True
  553. @region_silo_test
  554. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  555. def setUp(self):
  556. super().setUp()
  557. self.db = SnubaTSDB()
  558. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  559. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  560. )
  561. self.proj1 = self.create_project()
  562. self.env1 = Environment.objects.get_or_create(
  563. organization_id=self.proj1.organization_id, name="test"
  564. )[0]
  565. self.env2 = Environment.objects.get_or_create(
  566. organization_id=self.proj1.organization_id, name="dev"
  567. )[0]
  568. defaultenv = ""
  569. group1_fingerprint = f"{ProfileFileIOGroupType.type_id}-group1"
  570. group2_fingerprint = f"{ProfileFileIOGroupType.type_id}-group2"
  571. groups = {}
  572. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  573. event, occurrence, group_info = self.store_search_issue(
  574. project_id=self.proj1.id,
  575. # change every 55 min so some hours have 1 user, some have 2
  576. user_id=r // 3300,
  577. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  578. # release_version=str(r // 3600) * 10, # 1 per hour,
  579. environment=[self.env1.name, None][(r // 7200) % 3],
  580. insert_time=self.now + timedelta(seconds=r),
  581. )
  582. if group_info:
  583. groups[group_info.group.id] = group_info.group
  584. all_groups = list(groups.values())
  585. self.proj1group1 = all_groups[0]
  586. self.proj1group2 = all_groups[1]
  587. self.defaultenv = Environment.objects.get(name=defaultenv)
  588. def test_range_group_manual_group_time_rollup(self):
  589. project = self.create_project()
  590. # these are the only granularities/rollups that be actually be used
  591. GRANULARITIES = [
  592. (10, timedelta(seconds=10), 5),
  593. (60 * 60, timedelta(hours=1), 6),
  594. (60 * 60 * 24, timedelta(days=1), 15),
  595. ]
  596. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  597. hour=0, minute=0, second=0
  598. )
  599. for step, delta, times in GRANULARITIES:
  600. series = [start + (delta * i) for i in range(times)]
  601. series_ts = [int(to_timestamp(ts)) for ts in series]
  602. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  603. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  604. step,
  605. series_ts,
  606. )
  607. for time_step in series:
  608. _, _, group_info = self.store_search_issue(
  609. project_id=project.id,
  610. user_id=0,
  611. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  612. environment=None,
  613. insert_time=time_step,
  614. )
  615. assert self.db.get_range(
  616. TSDBModel.group_generic,
  617. [group_info.group.id],
  618. series[0],
  619. series[-1],
  620. rollup=None,
  621. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  622. def test_range_groups_mult(self):
  623. now = (datetime.utcnow() - timedelta(days=1)).replace(
  624. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  625. )
  626. dts = [now + timedelta(hours=i) for i in range(4)]
  627. project = self.create_project()
  628. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group4"
  629. groups = []
  630. for i in range(0, 11):
  631. _, _, group_info = self.store_search_issue(
  632. project_id=project.id,
  633. user_id=0,
  634. fingerprints=[group_fingerprint],
  635. environment=None,
  636. insert_time=now + timedelta(minutes=i * 10),
  637. )
  638. if group_info:
  639. groups.append(group_info.group)
  640. group = groups[0]
  641. assert self.db.get_range(
  642. TSDBModel.group_generic,
  643. [group.id],
  644. dts[0],
  645. dts[-1],
  646. rollup=3600,
  647. ) == {
  648. group.id: [
  649. (timestamp(dts[0]), 6),
  650. (timestamp(dts[1]), 5),
  651. (timestamp(dts[2]), 0),
  652. (timestamp(dts[3]), 0),
  653. ]
  654. }
  655. def test_range_groups_simple(self):
  656. project = self.create_project()
  657. now = (datetime.utcnow() - timedelta(days=1)).replace(
  658. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  659. )
  660. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group5"
  661. ids = [1, 2, 3, 4, 5]
  662. groups = []
  663. for r in ids:
  664. # for r in range(0, 9, 1):
  665. event, occurrence, group_info = self.store_search_issue(
  666. project_id=project.id,
  667. # change every 55 min so some hours have 1 user, some have 2
  668. user_id=r,
  669. fingerprints=[group_fingerprint],
  670. environment=None,
  671. # release_version=str(r // 3600) * 10, # 1 per hour,
  672. insert_time=now,
  673. )
  674. if group_info:
  675. groups.append(group_info.group)
  676. group = groups[0]
  677. dts = [now + timedelta(hours=i) for i in range(4)]
  678. assert self.db.get_range(
  679. TSDBModel.group_generic,
  680. [group.id],
  681. dts[0],
  682. dts[-1],
  683. rollup=3600,
  684. ) == {
  685. group.id: [
  686. (timestamp(dts[0]), len(ids)),
  687. (timestamp(dts[1]), 0),
  688. (timestamp(dts[2]), 0),
  689. (timestamp(dts[3]), 0),
  690. ]
  691. }
  692. def test_range_groups(self):
  693. dts = [self.now + timedelta(hours=i) for i in range(4)]
  694. # Multiple groups
  695. assert self.db.get_range(
  696. TSDBModel.group_generic,
  697. [self.proj1group1.id, self.proj1group2.id],
  698. dts[0],
  699. dts[-1],
  700. rollup=3600,
  701. ) == {
  702. self.proj1group1.id: [
  703. (timestamp(dts[0]), 3),
  704. (timestamp(dts[1]), 3),
  705. (timestamp(dts[2]), 3),
  706. (timestamp(dts[3]), 3),
  707. ],
  708. self.proj1group2.id: [
  709. (timestamp(dts[0]), 3),
  710. (timestamp(dts[1]), 3),
  711. (timestamp(dts[2]), 3),
  712. (timestamp(dts[3]), 3),
  713. ],
  714. }
  715. assert self.db.get_range(TSDBModel.group_generic, [], dts[0], dts[-1], rollup=3600) == {}
  716. def test_get_distinct_counts_totals_users(self):
  717. assert self.db.get_distinct_counts_totals(
  718. TSDBModel.users_affected_by_generic_group,
  719. [self.proj1group1.id],
  720. self.now,
  721. self.now + timedelta(hours=4),
  722. rollup=3600,
  723. ) == {
  724. self.proj1group1.id: 5 # 5 unique users overall
  725. }
  726. assert self.db.get_distinct_counts_totals(
  727. TSDBModel.users_affected_by_generic_group,
  728. [self.proj1group1.id],
  729. self.now,
  730. self.now,
  731. rollup=3600,
  732. ) == {
  733. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  734. }
  735. assert (
  736. self.db.get_distinct_counts_totals(
  737. TSDBModel.users_affected_by_generic_group,
  738. [],
  739. self.now,
  740. self.now + timedelta(hours=4),
  741. rollup=3600,
  742. )
  743. == {}
  744. )
  745. def test_get_sums(self):
  746. assert self.db.get_sums(
  747. model=TSDBModel.group_generic,
  748. keys=[self.proj1group1.id, self.proj1group2.id],
  749. start=self.now,
  750. end=self.now + timedelta(hours=4),
  751. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  752. def test_get_data_or_conditions_parsed(self):
  753. """
  754. Verify parsing the legacy format with nested OR conditions works
  755. """
  756. conditions = [
  757. # or conditions in the legacy format needs open and close brackets for precedence
  758. # there's some special casing when parsing conditions that specifically handles this
  759. [
  760. [["isNull", ["environment"]], "=", 1],
  761. ["environment", "IN", [self.env1.name]],
  762. ]
  763. ]
  764. data1 = self.db.get_data(
  765. model=TSDBModel.group_generic,
  766. keys=[self.proj1group1.id, self.proj1group2.id],
  767. conditions=conditions,
  768. start=self.now,
  769. end=self.now + timedelta(hours=4),
  770. )
  771. data2 = self.db.get_data(
  772. model=TSDBModel.group_generic,
  773. keys=[self.proj1group1.id, self.proj1group2.id],
  774. start=self.now,
  775. end=self.now + timedelta(hours=4),
  776. )
  777. # the above queries should return the same data since all groups either have:
  778. # environment=None or environment=test
  779. # so the condition really shouldn't be filtering anything
  780. assert data1 == data2
  781. class AddJitterToSeriesTest(TestCase):
  782. def setUp(self):
  783. self.db = SnubaTSDB()
  784. def run_test(self, end, interval, jitter, expected_start, expected_end):
  785. end = end.replace(tzinfo=pytz.UTC)
  786. start = end - interval
  787. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  788. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  789. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  790. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  791. def test(self):
  792. self.run_test(
  793. end=datetime(2022, 5, 18, 10, 23, 4),
  794. interval=timedelta(hours=1),
  795. jitter=5,
  796. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  797. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  798. )
  799. self.run_test(
  800. end=datetime(2022, 5, 18, 10, 23, 8),
  801. interval=timedelta(hours=1),
  802. jitter=5,
  803. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  804. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  805. )
  806. # Jitter should be the same
  807. self.run_test(
  808. end=datetime(2022, 5, 18, 10, 23, 8),
  809. interval=timedelta(hours=1),
  810. jitter=55,
  811. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  812. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  813. )
  814. self.run_test(
  815. end=datetime(2022, 5, 18, 22, 33, 2),
  816. interval=timedelta(minutes=1),
  817. jitter=3,
  818. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  819. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  820. )
  821. def test_empty_series(self):
  822. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  823. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []