test_tsdb_backend.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. from snuba_sdk import Limit
  4. from sentry.issues.grouptype import ProfileFileIOGroupType
  5. from sentry.models.environment import Environment
  6. from sentry.models.group import Group
  7. from sentry.models.grouprelease import GroupRelease
  8. from sentry.models.release import Release
  9. from sentry.testutils.cases import SnubaTestCase, TestCase
  10. from sentry.testutils.helpers.datetime import iso_format
  11. from sentry.testutils.silo import region_silo_test
  12. from sentry.tsdb.base import TSDBModel
  13. from sentry.tsdb.snuba import SnubaTSDB
  14. from sentry.utils.dates import to_datetime, to_timestamp
  15. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  16. def timestamp(d):
  17. t = int(to_timestamp(d))
  18. return t - (t % 3600)
  19. def has_shape(data, shape, allow_empty=False):
  20. """
  21. Determine if a data object has the provided shape
  22. At any level, the object in `data` and in `shape` must have the same type.
  23. A dict is the same shape if all its keys and values have the same shape as the
  24. key/value in `shape`. The number of keys/values is not relevant.
  25. A list is the same shape if all its items have the same shape as the value
  26. in `shape`
  27. A tuple is the same shape if it has the same length as `shape` and all the
  28. values have the same shape as the corresponding value in `shape`
  29. Any other object simply has to have the same type.
  30. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  31. """
  32. if not isinstance(data, type(shape)):
  33. return False
  34. if isinstance(data, dict):
  35. return (
  36. (allow_empty or len(data) > 0)
  37. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  38. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  39. )
  40. elif isinstance(data, list):
  41. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  42. elif isinstance(data, tuple):
  43. return len(data) == len(shape) and all(
  44. has_shape(data[i], shape[i]) for i in range(len(data))
  45. )
  46. else:
  47. return True
  48. class SnubaTSDBTest(TestCase, SnubaTestCase):
  49. def setUp(self):
  50. super().setUp()
  51. self.db = SnubaTSDB()
  52. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  53. hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  54. )
  55. self.proj1 = self.create_project()
  56. env1 = "test"
  57. env2 = "dev"
  58. defaultenv = ""
  59. release1 = "1" * 10
  60. release2 = "2" * 10
  61. self.release1 = Release.objects.create(
  62. organization_id=self.organization.id, version=release1, date_added=self.now
  63. )
  64. self.release1.add_project(self.proj1)
  65. self.release2 = Release.objects.create(
  66. organization_id=self.organization.id, version=release2, date_added=self.now
  67. )
  68. self.release2.add_project(self.proj1)
  69. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  70. self.store_event(
  71. data={
  72. "event_id": (str(r) * 32)[:32],
  73. "message": "message 1",
  74. "platform": "python",
  75. "fingerprint": [["group-1"], ["group-2"]][
  76. (r // 600) % 2
  77. ], # Switch every 10 mins
  78. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  79. "tags": {
  80. "foo": "bar",
  81. "baz": "quux",
  82. # Switch every 2 hours
  83. "environment": [env1, None][(r // 7200) % 3],
  84. "sentry:user": f"id:user{r // 3300}",
  85. },
  86. "user": {
  87. # change every 55 min so some hours have 1 user, some have 2
  88. "id": f"user{r // 3300}",
  89. },
  90. "release": str(r // 3600) * 10, # 1 per hour,
  91. },
  92. project_id=self.proj1.id,
  93. )
  94. groups = Group.objects.filter(project=self.proj1).order_by("id")
  95. self.proj1group1 = groups[0]
  96. self.proj1group2 = groups[1]
  97. self.env1 = Environment.objects.get(name=env1)
  98. self.env2 = self.create_environment(name=env2) # No events
  99. self.defaultenv = Environment.objects.get(name=defaultenv)
  100. self.group1release1env1 = GroupRelease.objects.get(
  101. project_id=self.proj1.id,
  102. group_id=self.proj1group1.id,
  103. release_id=self.release1.id,
  104. environment=env1,
  105. )
  106. self.group1release2env1 = GroupRelease.objects.create(
  107. project_id=self.proj1.id,
  108. group_id=self.proj1group1.id,
  109. release_id=self.release2.id,
  110. environment=env1,
  111. )
  112. self.group2release1env1 = GroupRelease.objects.get(
  113. project_id=self.proj1.id,
  114. group_id=self.proj1group2.id,
  115. release_id=self.release1.id,
  116. environment=env1,
  117. )
  118. def test_range_single(self):
  119. env1 = "test"
  120. project = self.create_project()
  121. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  122. self.store_event(
  123. data={
  124. "event_id": (str(r) * 32)[:32],
  125. "message": "message 1",
  126. "platform": "python",
  127. "fingerprint": ["group-1"],
  128. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  129. "tags": {
  130. "foo": "bar",
  131. "baz": "quux",
  132. # Switch every 2 hours
  133. "environment": [env1, None][(r // 7200) % 3],
  134. "sentry:user": f"id:user{r // 3300}",
  135. },
  136. "user": {
  137. # change every 55 min so some hours have 1 user, some have 2
  138. "id": f"user{r // 3300}",
  139. },
  140. "release": str(r // 3600) * 10, # 1 per hour,
  141. },
  142. project_id=project.id,
  143. )
  144. groups = Group.objects.filter(project=project).order_by("id")
  145. group = groups[0]
  146. dts = [self.now + timedelta(hours=i) for i in range(4)]
  147. assert self.db.get_range(
  148. TSDBModel.group,
  149. [group.id],
  150. dts[0],
  151. dts[-1],
  152. rollup=3600,
  153. tenant_ids={"referrer": "r", "organization_id": 1234},
  154. ) == {
  155. group.id: [
  156. (timestamp(dts[0]), 6 * 2),
  157. (timestamp(dts[1]), 6 * 2),
  158. (timestamp(dts[2]), 6 * 2),
  159. (timestamp(dts[3]), 6 * 2),
  160. ]
  161. }
  162. def test_range_groups(self):
  163. dts = [self.now + timedelta(hours=i) for i in range(4)]
  164. assert self.db.get_range(
  165. TSDBModel.group,
  166. [self.proj1group1.id],
  167. dts[0],
  168. dts[-1],
  169. rollup=3600,
  170. tenant_ids={"referrer": "r", "organization_id": 1234},
  171. ) == {
  172. self.proj1group1.id: [
  173. (timestamp(dts[0]), 3),
  174. (timestamp(dts[1]), 3),
  175. (timestamp(dts[2]), 3),
  176. (timestamp(dts[3]), 3),
  177. ]
  178. }
  179. # Multiple groups
  180. assert self.db.get_range(
  181. TSDBModel.group,
  182. [self.proj1group1.id, self.proj1group2.id],
  183. dts[0],
  184. dts[-1],
  185. rollup=3600,
  186. tenant_ids={"referrer": "r", "organization_id": 1234},
  187. ) == {
  188. self.proj1group1.id: [
  189. (timestamp(dts[0]), 3),
  190. (timestamp(dts[1]), 3),
  191. (timestamp(dts[2]), 3),
  192. (timestamp(dts[3]), 3),
  193. ],
  194. self.proj1group2.id: [
  195. (timestamp(dts[0]), 3),
  196. (timestamp(dts[1]), 3),
  197. (timestamp(dts[2]), 3),
  198. (timestamp(dts[3]), 3),
  199. ],
  200. }
  201. assert (
  202. self.db.get_range(
  203. TSDBModel.group,
  204. [],
  205. dts[0],
  206. dts[-1],
  207. rollup=3600,
  208. tenant_ids={"referrer": "test", "organization_id": 1},
  209. )
  210. == {}
  211. )
  212. def test_range_releases(self):
  213. dts = [self.now + timedelta(hours=i) for i in range(4)]
  214. assert self.db.get_range(
  215. TSDBModel.release,
  216. [self.release1.id],
  217. dts[0],
  218. dts[-1],
  219. rollup=3600,
  220. tenant_ids={"referrer": "r", "organization_id": 1234},
  221. ) == {
  222. self.release1.id: [
  223. (timestamp(dts[0]), 0),
  224. (timestamp(dts[1]), 6),
  225. (timestamp(dts[2]), 0),
  226. (timestamp(dts[3]), 0),
  227. ]
  228. }
  229. def test_range_project(self):
  230. dts = [self.now + timedelta(hours=i) for i in range(4)]
  231. assert self.db.get_range(
  232. TSDBModel.project,
  233. [self.proj1.id],
  234. dts[0],
  235. dts[-1],
  236. rollup=3600,
  237. tenant_ids={"referrer": "r", "organization_id": 1234},
  238. ) == {
  239. self.proj1.id: [
  240. (timestamp(dts[0]), 6),
  241. (timestamp(dts[1]), 6),
  242. (timestamp(dts[2]), 6),
  243. (timestamp(dts[3]), 6),
  244. ]
  245. }
  246. def test_range_environment_filter(self):
  247. dts = [self.now + timedelta(hours=i) for i in range(4)]
  248. assert self.db.get_range(
  249. TSDBModel.project,
  250. [self.proj1.id],
  251. dts[0],
  252. dts[-1],
  253. rollup=3600,
  254. environment_ids=[self.env1.id],
  255. tenant_ids={"referrer": "r", "organization_id": 1234},
  256. ) == {
  257. self.proj1.id: [
  258. (timestamp(dts[0]), 6),
  259. (timestamp(dts[1]), 6),
  260. (timestamp(dts[2]), 0),
  261. (timestamp(dts[3]), 0),
  262. ]
  263. }
  264. # No events submitted for env2
  265. assert self.db.get_range(
  266. TSDBModel.project,
  267. [self.proj1.id],
  268. dts[0],
  269. dts[-1],
  270. rollup=3600,
  271. environment_ids=[self.env2.id],
  272. tenant_ids={"referrer": "r", "organization_id": 1234},
  273. ) == {
  274. self.proj1.id: [
  275. (timestamp(dts[0]), 0),
  276. (timestamp(dts[1]), 0),
  277. (timestamp(dts[2]), 0),
  278. (timestamp(dts[3]), 0),
  279. ]
  280. }
  281. # Events submitted with no environment should match default environment
  282. assert self.db.get_range(
  283. TSDBModel.project,
  284. [self.proj1.id],
  285. dts[0],
  286. dts[-1],
  287. rollup=3600,
  288. environment_ids=[self.defaultenv.id],
  289. tenant_ids={"referrer": "r", "organization_id": 1234},
  290. ) == {
  291. self.proj1.id: [
  292. (timestamp(dts[0]), 0),
  293. (timestamp(dts[1]), 0),
  294. (timestamp(dts[2]), 6),
  295. (timestamp(dts[3]), 6),
  296. ]
  297. }
  298. def test_range_rollups(self):
  299. # Daily
  300. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  301. dts = [daystart + timedelta(days=i) for i in range(2)]
  302. assert self.db.get_range(
  303. TSDBModel.project,
  304. [self.proj1.id],
  305. dts[0],
  306. dts[-1],
  307. rollup=86400,
  308. tenant_ids={"referrer": "r", "organization_id": 1234},
  309. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  310. # Minutely
  311. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  312. # Expect every 10th minute to have a 1, else 0
  313. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  314. assert self.db.get_range(
  315. TSDBModel.project,
  316. [self.proj1.id],
  317. dts[0],
  318. dts[-1],
  319. rollup=60,
  320. tenant_ids={"referrer": "r", "organization_id": 1234},
  321. ) == {self.proj1.id: expected}
  322. def test_distinct_counts_series_users(self):
  323. dts = [self.now + timedelta(hours=i) for i in range(4)]
  324. assert self.db.get_distinct_counts_series(
  325. TSDBModel.users_affected_by_group,
  326. [self.proj1group1.id],
  327. dts[0],
  328. dts[-1],
  329. rollup=3600,
  330. tenant_ids={"referrer": "r", "organization_id": 1234},
  331. ) == {
  332. self.proj1group1.id: [
  333. (timestamp(dts[0]), 1),
  334. (timestamp(dts[1]), 1),
  335. (timestamp(dts[2]), 1),
  336. (timestamp(dts[3]), 2),
  337. ]
  338. }
  339. dts = [self.now + timedelta(hours=i) for i in range(4)]
  340. assert self.db.get_distinct_counts_series(
  341. TSDBModel.users_affected_by_project,
  342. [self.proj1.id],
  343. dts[0],
  344. dts[-1],
  345. rollup=3600,
  346. tenant_ids={"referrer": "r", "organization_id": 1234},
  347. ) == {
  348. self.proj1.id: [
  349. (timestamp(dts[0]), 1),
  350. (timestamp(dts[1]), 2),
  351. (timestamp(dts[2]), 2),
  352. (timestamp(dts[3]), 2),
  353. ]
  354. }
  355. assert (
  356. self.db.get_distinct_counts_series(
  357. TSDBModel.users_affected_by_group,
  358. [],
  359. dts[0],
  360. dts[-1],
  361. rollup=3600,
  362. tenant_ids={"referrer": "r", "organization_id": 1234},
  363. )
  364. == {}
  365. )
  366. def get_distinct_counts_totals_users(self):
  367. assert self.db.get_distinct_counts_totals(
  368. TSDBModel.users_affected_by_group,
  369. [self.proj1group1.id],
  370. self.now,
  371. self.now + timedelta(hours=4),
  372. rollup=3600,
  373. tenant_ids={"referrer": "r", "organization_id": 1234},
  374. ) == {
  375. self.proj1group1.id: 2 # 2 unique users overall
  376. }
  377. assert self.db.get_distinct_counts_totals(
  378. TSDBModel.users_affected_by_group,
  379. [self.proj1group1.id],
  380. self.now,
  381. self.now,
  382. rollup=3600,
  383. tenant_ids={"referrer": "r", "organization_id": 1234},
  384. ) == {
  385. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  386. }
  387. assert self.db.get_distinct_counts_totals(
  388. TSDBModel.users_affected_by_project,
  389. [self.proj1.id],
  390. self.now,
  391. self.now + timedelta(hours=4),
  392. rollup=3600,
  393. tenant_ids={"referrer": "r", "organization_id": 1234},
  394. ) == {self.proj1.id: 2}
  395. assert (
  396. self.db.get_distinct_counts_totals(
  397. TSDBModel.users_affected_by_group,
  398. [],
  399. self.now,
  400. self.now + timedelta(hours=4),
  401. rollup=3600,
  402. tenant_ids={"referrer": "r", "organization_id": 1234},
  403. )
  404. == {}
  405. )
  406. def test_most_frequent(self):
  407. assert self.db.get_most_frequent(
  408. TSDBModel.frequent_issues_by_project,
  409. [self.proj1.id],
  410. self.now,
  411. self.now + timedelta(hours=4),
  412. rollup=3600,
  413. tenant_ids={"referrer": "r", "organization_id": 1234},
  414. ) in [
  415. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  416. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  417. ] # Both issues equally frequent
  418. assert (
  419. self.db.get_most_frequent(
  420. TSDBModel.frequent_issues_by_project,
  421. [],
  422. self.now,
  423. self.now + timedelta(hours=4),
  424. rollup=3600,
  425. tenant_ids={"referrer": "r", "organization_id": 1234},
  426. )
  427. == {}
  428. )
  429. def test_frequency_series(self):
  430. dts = [self.now + timedelta(hours=i) for i in range(4)]
  431. assert self.db.get_frequency_series(
  432. TSDBModel.frequent_releases_by_group,
  433. {
  434. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  435. self.proj1group2.id: (self.group2release1env1.id,),
  436. },
  437. dts[0],
  438. dts[-1],
  439. rollup=3600,
  440. tenant_ids={"referrer": "r", "organization_id": 1234},
  441. ) == {
  442. self.proj1group1.id: [
  443. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  444. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  445. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  446. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  447. ],
  448. self.proj1group2.id: [
  449. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  450. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  451. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  452. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  453. ],
  454. }
  455. assert (
  456. self.db.get_frequency_series(
  457. TSDBModel.frequent_releases_by_group,
  458. {},
  459. dts[0],
  460. dts[-1],
  461. rollup=3600,
  462. tenant_ids={"referrer": "r", "organization_id": 1234},
  463. )
  464. == {}
  465. )
  466. def test_result_shape(self):
  467. """
  468. Tests that the results from the different TSDB methods have the
  469. expected format.
  470. """
  471. project_id = self.proj1.id
  472. dts = [self.now + timedelta(hours=i) for i in range(4)]
  473. results = self.db.get_most_frequent(
  474. TSDBModel.frequent_issues_by_project,
  475. [project_id],
  476. dts[0],
  477. dts[0],
  478. tenant_ids={"referrer": "r", "organization_id": 1234},
  479. )
  480. assert has_shape(results, {1: [(1, 1.0)]})
  481. results = self.db.get_most_frequent_series(
  482. TSDBModel.frequent_issues_by_project,
  483. [project_id],
  484. dts[0],
  485. dts[0],
  486. tenant_ids={"referrer": "r", "organization_id": 1234},
  487. )
  488. assert has_shape(results, {1: [(1, {1: 1.0})]})
  489. items = {
  490. # {project_id: (issue_id, issue_id, ...)}
  491. project_id: (self.proj1group1.id, self.proj1group2.id)
  492. }
  493. results = self.db.get_frequency_series(
  494. TSDBModel.frequent_issues_by_project,
  495. items,
  496. dts[0],
  497. dts[-1],
  498. tenant_ids={"referrer": "r", "organization_id": 1234},
  499. )
  500. assert has_shape(results, {1: [(1, {1: 1})]})
  501. results = self.db.get_frequency_totals(
  502. TSDBModel.frequent_issues_by_project,
  503. items,
  504. dts[0],
  505. dts[-1],
  506. tenant_ids={"referrer": "r", "organization_id": 1234},
  507. )
  508. assert has_shape(results, {1: {1: 1}})
  509. results = self.db.get_range(
  510. TSDBModel.project,
  511. [project_id],
  512. dts[0],
  513. dts[-1],
  514. tenant_ids={"referrer": "r", "organization_id": 1234},
  515. )
  516. assert has_shape(results, {1: [(1, 1)]})
  517. results = self.db.get_distinct_counts_series(
  518. TSDBModel.users_affected_by_project,
  519. [project_id],
  520. dts[0],
  521. dts[-1],
  522. tenant_ids={"referrer": "r", "organization_id": 1234},
  523. )
  524. assert has_shape(results, {1: [(1, 1)]})
  525. results = self.db.get_distinct_counts_totals(
  526. TSDBModel.users_affected_by_project,
  527. [project_id],
  528. dts[0],
  529. dts[-1],
  530. tenant_ids={"referrer": "r", "organization_id": 1234},
  531. )
  532. assert has_shape(results, {1: 1})
  533. results = self.db.get_distinct_counts_union(
  534. TSDBModel.users_affected_by_project,
  535. [project_id],
  536. dts[0],
  537. dts[-1],
  538. tenant_ids={"referrer": "r", "organization_id": 1234},
  539. )
  540. assert has_shape(results, 1)
  541. def test_calculated_limit(self):
  542. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  543. # 24h test
  544. rollup = 3600
  545. end = self.now
  546. start = end + timedelta(days=-1, seconds=rollup)
  547. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  548. assert snuba.call_args.args[0].query.limit == Limit(120)
  549. # 14 day test
  550. rollup = 86400
  551. start = end + timedelta(days=-14, seconds=rollup)
  552. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  553. assert snuba.call_args.args[0].query.limit == Limit(70)
  554. # 1h test
  555. rollup = 3600
  556. end = self.now
  557. start = end + timedelta(hours=-1, seconds=rollup)
  558. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  559. assert snuba.call_args.args[0].query.limit == Limit(5)
  560. @patch("sentry.utils.snuba.OVERRIDE_OPTIONS", new={"consistent": True})
  561. def test_tsdb_with_consistent(self):
  562. with patch("sentry.utils.snuba._apply_cache_and_build_results") as snuba:
  563. rollup = 3600
  564. end = self.now
  565. start = end + timedelta(days=-1, seconds=rollup)
  566. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  567. assert snuba.call_args.args[0][0][0].query.limit == Limit(120)
  568. assert snuba.call_args.args[0][0][0].flags.consistent is True
  569. @region_silo_test
  570. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  571. def setUp(self):
  572. super().setUp()
  573. self.db = SnubaTSDB()
  574. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  575. hour=0, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  576. )
  577. self.proj1 = self.create_project()
  578. self.env1 = Environment.objects.get_or_create(
  579. organization_id=self.proj1.organization_id, name="test"
  580. )[0]
  581. self.env2 = Environment.objects.get_or_create(
  582. organization_id=self.proj1.organization_id, name="dev"
  583. )[0]
  584. defaultenv = ""
  585. group1_fingerprint = f"{ProfileFileIOGroupType.type_id}-group1"
  586. group2_fingerprint = f"{ProfileFileIOGroupType.type_id}-group2"
  587. groups = {}
  588. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  589. event, occurrence, group_info = self.store_search_issue(
  590. project_id=self.proj1.id,
  591. # change every 55 min so some hours have 1 user, some have 2
  592. user_id=r // 3300,
  593. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  594. # release_version=str(r // 3600) * 10, # 1 per hour,
  595. environment=[self.env1.name, None][(r // 7200) % 3],
  596. insert_time=self.now + timedelta(seconds=r),
  597. )
  598. if group_info:
  599. groups[group_info.group.id] = group_info.group
  600. all_groups = list(groups.values())
  601. self.proj1group1 = all_groups[0]
  602. self.proj1group2 = all_groups[1]
  603. self.defaultenv = Environment.objects.get(name=defaultenv)
  604. def test_range_group_manual_group_time_rollup(self):
  605. project = self.create_project()
  606. # these are the only granularities/rollups that be actually be used
  607. GRANULARITIES = [
  608. (10, timedelta(seconds=10), 5),
  609. (60 * 60, timedelta(hours=1), 6),
  610. (60 * 60 * 24, timedelta(days=1), 15),
  611. ]
  612. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  613. hour=0, minute=0, second=0
  614. )
  615. for step, delta, times in GRANULARITIES:
  616. series = [start + (delta * i) for i in range(times)]
  617. series_ts = [int(to_timestamp(ts)) for ts in series]
  618. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  619. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  620. step,
  621. series_ts,
  622. )
  623. for time_step in series:
  624. _, _, group_info = self.store_search_issue(
  625. project_id=project.id,
  626. user_id=0,
  627. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  628. environment=None,
  629. insert_time=time_step,
  630. )
  631. assert group_info is not None
  632. assert self.db.get_range(
  633. TSDBModel.group_generic,
  634. [group_info.group.id],
  635. series[0],
  636. series[-1],
  637. rollup=None,
  638. tenant_ids={"referrer": "test", "organization_id": 1},
  639. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  640. def test_range_groups_mult(self):
  641. now = (datetime.utcnow() - timedelta(days=1)).replace(
  642. hour=10, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  643. )
  644. dts = [now + timedelta(hours=i) for i in range(4)]
  645. project = self.create_project()
  646. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group4"
  647. groups = []
  648. for i in range(0, 11):
  649. _, _, group_info = self.store_search_issue(
  650. project_id=project.id,
  651. user_id=0,
  652. fingerprints=[group_fingerprint],
  653. environment=None,
  654. insert_time=now + timedelta(minutes=i * 10),
  655. )
  656. if group_info:
  657. groups.append(group_info.group)
  658. group = groups[0]
  659. assert self.db.get_range(
  660. TSDBModel.group_generic,
  661. [group.id],
  662. dts[0],
  663. dts[-1],
  664. rollup=3600,
  665. tenant_ids={"referrer": "test", "organization_id": 1},
  666. ) == {
  667. group.id: [
  668. (timestamp(dts[0]), 6),
  669. (timestamp(dts[1]), 5),
  670. (timestamp(dts[2]), 0),
  671. (timestamp(dts[3]), 0),
  672. ]
  673. }
  674. def test_range_groups_simple(self):
  675. project = self.create_project()
  676. now = (datetime.utcnow() - timedelta(days=1)).replace(
  677. hour=10, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
  678. )
  679. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group5"
  680. ids = [1, 2, 3, 4, 5]
  681. groups = []
  682. for r in ids:
  683. # for r in range(0, 9, 1):
  684. event, occurrence, group_info = self.store_search_issue(
  685. project_id=project.id,
  686. # change every 55 min so some hours have 1 user, some have 2
  687. user_id=r,
  688. fingerprints=[group_fingerprint],
  689. environment=None,
  690. # release_version=str(r // 3600) * 10, # 1 per hour,
  691. insert_time=now,
  692. )
  693. if group_info:
  694. groups.append(group_info.group)
  695. group = groups[0]
  696. dts = [now + timedelta(hours=i) for i in range(4)]
  697. assert self.db.get_range(
  698. TSDBModel.group_generic,
  699. [group.id],
  700. dts[0],
  701. dts[-1],
  702. rollup=3600,
  703. tenant_ids={"referrer": "test", "organization_id": 1},
  704. ) == {
  705. group.id: [
  706. (timestamp(dts[0]), len(ids)),
  707. (timestamp(dts[1]), 0),
  708. (timestamp(dts[2]), 0),
  709. (timestamp(dts[3]), 0),
  710. ]
  711. }
  712. def test_range_groups(self):
  713. dts = [self.now + timedelta(hours=i) for i in range(4)]
  714. # Multiple groups
  715. assert self.db.get_range(
  716. TSDBModel.group_generic,
  717. [self.proj1group1.id, self.proj1group2.id],
  718. dts[0],
  719. dts[-1],
  720. rollup=3600,
  721. tenant_ids={"referrer": "test", "organization_id": 1},
  722. ) == {
  723. self.proj1group1.id: [
  724. (timestamp(dts[0]), 3),
  725. (timestamp(dts[1]), 3),
  726. (timestamp(dts[2]), 3),
  727. (timestamp(dts[3]), 3),
  728. ],
  729. self.proj1group2.id: [
  730. (timestamp(dts[0]), 3),
  731. (timestamp(dts[1]), 3),
  732. (timestamp(dts[2]), 3),
  733. (timestamp(dts[3]), 3),
  734. ],
  735. }
  736. assert (
  737. self.db.get_range(
  738. TSDBModel.group_generic,
  739. [],
  740. dts[0],
  741. dts[-1],
  742. rollup=3600,
  743. tenant_ids={"referrer": "test", "organization_id": 1},
  744. )
  745. == {}
  746. )
  747. def test_get_distinct_counts_totals_users(self):
  748. assert self.db.get_distinct_counts_totals(
  749. TSDBModel.users_affected_by_generic_group,
  750. [self.proj1group1.id],
  751. self.now,
  752. self.now + timedelta(hours=4),
  753. rollup=3600,
  754. tenant_ids={"referrer": "test", "organization_id": 1},
  755. ) == {
  756. self.proj1group1.id: 5 # 5 unique users overall
  757. }
  758. assert self.db.get_distinct_counts_totals(
  759. TSDBModel.users_affected_by_generic_group,
  760. [self.proj1group1.id],
  761. self.now,
  762. self.now,
  763. rollup=3600,
  764. tenant_ids={"referrer": "test", "organization_id": 1},
  765. ) == {
  766. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  767. }
  768. assert (
  769. self.db.get_distinct_counts_totals(
  770. TSDBModel.users_affected_by_generic_group,
  771. [],
  772. self.now,
  773. self.now + timedelta(hours=4),
  774. rollup=3600,
  775. tenant_ids={"referrer": "test", "organization_id": 1},
  776. )
  777. == {}
  778. )
  779. def test_get_sums(self):
  780. assert self.db.get_sums(
  781. model=TSDBModel.group_generic,
  782. keys=[self.proj1group1.id, self.proj1group2.id],
  783. start=self.now,
  784. end=self.now + timedelta(hours=4),
  785. tenant_ids={"referrer": "test", "organization_id": 1},
  786. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  787. def test_get_data_or_conditions_parsed(self):
  788. """
  789. Verify parsing the legacy format with nested OR conditions works
  790. """
  791. conditions = [
  792. # or conditions in the legacy format needs open and close brackets for precedence
  793. # there's some special casing when parsing conditions that specifically handles this
  794. [
  795. [["isNull", ["environment"]], "=", 1],
  796. ["environment", "IN", [self.env1.name]],
  797. ]
  798. ]
  799. data1 = self.db.get_data(
  800. model=TSDBModel.group_generic,
  801. keys=[self.proj1group1.id, self.proj1group2.id],
  802. conditions=conditions,
  803. start=self.now,
  804. end=self.now + timedelta(hours=4),
  805. tenant_ids={"referrer": "test", "organization_id": 1},
  806. )
  807. data2 = self.db.get_data(
  808. model=TSDBModel.group_generic,
  809. keys=[self.proj1group1.id, self.proj1group2.id],
  810. start=self.now,
  811. end=self.now + timedelta(hours=4),
  812. tenant_ids={"referrer": "test", "organization_id": 1},
  813. )
  814. # the above queries should return the same data since all groups either have:
  815. # environment=None or environment=test
  816. # so the condition really shouldn't be filtering anything
  817. assert data1 == data2
  818. class AddJitterToSeriesTest(TestCase):
  819. def setUp(self):
  820. self.db = SnubaTSDB()
  821. def run_test(self, end, interval, jitter, expected_start, expected_end):
  822. end = end.replace(tzinfo=timezone.utc)
  823. start = end - interval
  824. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  825. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  826. assert to_datetime(series[0]) == expected_start.replace(tzinfo=timezone.utc)
  827. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=timezone.utc)
  828. def test(self):
  829. self.run_test(
  830. end=datetime(2022, 5, 18, 10, 23, 4),
  831. interval=timedelta(hours=1),
  832. jitter=5,
  833. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  834. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  835. )
  836. self.run_test(
  837. end=datetime(2022, 5, 18, 10, 23, 8),
  838. interval=timedelta(hours=1),
  839. jitter=5,
  840. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  841. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  842. )
  843. # Jitter should be the same
  844. self.run_test(
  845. end=datetime(2022, 5, 18, 10, 23, 8),
  846. interval=timedelta(hours=1),
  847. jitter=55,
  848. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  849. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  850. )
  851. self.run_test(
  852. end=datetime(2022, 5, 18, 22, 33, 2),
  853. interval=timedelta(minutes=1),
  854. jitter=3,
  855. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  856. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  857. )
  858. def test_empty_series(self):
  859. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  860. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []