test_tsdb_backend.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. import pytz
  4. from snuba_sdk import Limit
  5. from sentry.issues.grouptype import ProfileFileIOGroupType
  6. from sentry.models import Environment, Group, GroupRelease, Release
  7. from sentry.testutils import SnubaTestCase, TestCase
  8. from sentry.testutils.helpers.datetime import iso_format
  9. from sentry.testutils.silo import region_silo_test
  10. from sentry.tsdb.base import TSDBModel
  11. from sentry.tsdb.snuba import SnubaTSDB
  12. from sentry.utils.dates import to_datetime, to_timestamp
  13. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  14. def timestamp(d):
  15. t = int(to_timestamp(d))
  16. return t - (t % 3600)
  17. def has_shape(data, shape, allow_empty=False):
  18. """
  19. Determine if a data object has the provided shape
  20. At any level, the object in `data` and in `shape` must have the same type.
  21. A dict is the same shape if all its keys and values have the same shape as the
  22. key/value in `shape`. The number of keys/values is not relevant.
  23. A list is the same shape if all its items have the same shape as the value
  24. in `shape`
  25. A tuple is the same shape if it has the same length as `shape` and all the
  26. values have the same shape as the corresponding value in `shape`
  27. Any other object simply has to have the same type.
  28. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  29. """
  30. if not isinstance(data, type(shape)):
  31. return False
  32. if isinstance(data, dict):
  33. return (
  34. (allow_empty or len(data) > 0)
  35. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  36. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  37. )
  38. elif isinstance(data, list):
  39. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  40. elif isinstance(data, tuple):
  41. return len(data) == len(shape) and all(
  42. has_shape(data[i], shape[i]) for i in range(len(data))
  43. )
  44. else:
  45. return True
  46. class SnubaTSDBTest(TestCase, SnubaTestCase):
  47. def setUp(self):
  48. super().setUp()
  49. self.db = SnubaTSDB()
  50. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  51. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  52. )
  53. self.proj1 = self.create_project()
  54. env1 = "test"
  55. env2 = "dev"
  56. defaultenv = ""
  57. release1 = "1" * 10
  58. release2 = "2" * 10
  59. self.release1 = Release.objects.create(
  60. organization_id=self.organization.id, version=release1, date_added=self.now
  61. )
  62. self.release1.add_project(self.proj1)
  63. self.release2 = Release.objects.create(
  64. organization_id=self.organization.id, version=release2, date_added=self.now
  65. )
  66. self.release2.add_project(self.proj1)
  67. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  68. self.store_event(
  69. data={
  70. "event_id": (str(r) * 32)[:32],
  71. "message": "message 1",
  72. "platform": "python",
  73. "fingerprint": [["group-1"], ["group-2"]][
  74. (r // 600) % 2
  75. ], # Switch every 10 mins
  76. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  77. "tags": {
  78. "foo": "bar",
  79. "baz": "quux",
  80. # Switch every 2 hours
  81. "environment": [env1, None][(r // 7200) % 3],
  82. "sentry:user": f"id:user{r // 3300}",
  83. },
  84. "user": {
  85. # change every 55 min so some hours have 1 user, some have 2
  86. "id": f"user{r // 3300}",
  87. },
  88. "release": str(r // 3600) * 10, # 1 per hour,
  89. },
  90. project_id=self.proj1.id,
  91. )
  92. groups = Group.objects.filter(project=self.proj1).order_by("id")
  93. self.proj1group1 = groups[0]
  94. self.proj1group2 = groups[1]
  95. self.env1 = Environment.objects.get(name=env1)
  96. self.env2 = self.create_environment(name=env2) # No events
  97. self.defaultenv = Environment.objects.get(name=defaultenv)
  98. self.group1release1env1 = GroupRelease.objects.get(
  99. project_id=self.proj1.id,
  100. group_id=self.proj1group1.id,
  101. release_id=self.release1.id,
  102. environment=env1,
  103. )
  104. self.group1release2env1 = GroupRelease.objects.create(
  105. project_id=self.proj1.id,
  106. group_id=self.proj1group1.id,
  107. release_id=self.release2.id,
  108. environment=env1,
  109. )
  110. self.group2release1env1 = GroupRelease.objects.get(
  111. project_id=self.proj1.id,
  112. group_id=self.proj1group2.id,
  113. release_id=self.release1.id,
  114. environment=env1,
  115. )
  116. def test_range_single(self):
  117. env1 = "test"
  118. project = self.create_project()
  119. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  120. self.store_event(
  121. data={
  122. "event_id": (str(r) * 32)[:32],
  123. "message": "message 1",
  124. "platform": "python",
  125. "fingerprint": ["group-1"],
  126. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  127. "tags": {
  128. "foo": "bar",
  129. "baz": "quux",
  130. # Switch every 2 hours
  131. "environment": [env1, None][(r // 7200) % 3],
  132. "sentry:user": f"id:user{r // 3300}",
  133. },
  134. "user": {
  135. # change every 55 min so some hours have 1 user, some have 2
  136. "id": f"user{r // 3300}",
  137. },
  138. "release": str(r // 3600) * 10, # 1 per hour,
  139. },
  140. project_id=project.id,
  141. )
  142. groups = Group.objects.filter(project=project).order_by("id")
  143. group = groups[0]
  144. dts = [self.now + timedelta(hours=i) for i in range(4)]
  145. assert self.db.get_range(
  146. TSDBModel.group,
  147. [group.id],
  148. dts[0],
  149. dts[-1],
  150. rollup=3600,
  151. tenant_ids={"referrer": "r", "organization_id": 1234},
  152. ) == {
  153. group.id: [
  154. (timestamp(dts[0]), 6 * 2),
  155. (timestamp(dts[1]), 6 * 2),
  156. (timestamp(dts[2]), 6 * 2),
  157. (timestamp(dts[3]), 6 * 2),
  158. ]
  159. }
  160. def test_range_groups(self):
  161. dts = [self.now + timedelta(hours=i) for i in range(4)]
  162. assert self.db.get_range(
  163. TSDBModel.group,
  164. [self.proj1group1.id],
  165. dts[0],
  166. dts[-1],
  167. rollup=3600,
  168. tenant_ids={"referrer": "r", "organization_id": 1234},
  169. ) == {
  170. self.proj1group1.id: [
  171. (timestamp(dts[0]), 3),
  172. (timestamp(dts[1]), 3),
  173. (timestamp(dts[2]), 3),
  174. (timestamp(dts[3]), 3),
  175. ]
  176. }
  177. # Multiple groups
  178. assert self.db.get_range(
  179. TSDBModel.group,
  180. [self.proj1group1.id, self.proj1group2.id],
  181. dts[0],
  182. dts[-1],
  183. rollup=3600,
  184. tenant_ids={"referrer": "r", "organization_id": 1234},
  185. ) == {
  186. self.proj1group1.id: [
  187. (timestamp(dts[0]), 3),
  188. (timestamp(dts[1]), 3),
  189. (timestamp(dts[2]), 3),
  190. (timestamp(dts[3]), 3),
  191. ],
  192. self.proj1group2.id: [
  193. (timestamp(dts[0]), 3),
  194. (timestamp(dts[1]), 3),
  195. (timestamp(dts[2]), 3),
  196. (timestamp(dts[3]), 3),
  197. ],
  198. }
  199. assert (
  200. self.db.get_range(
  201. TSDBModel.group,
  202. [],
  203. dts[0],
  204. dts[-1],
  205. rollup=3600,
  206. tenant_ids={"referrer": "test", "organization_id": 1},
  207. )
  208. == {}
  209. )
  210. def test_range_releases(self):
  211. dts = [self.now + timedelta(hours=i) for i in range(4)]
  212. assert self.db.get_range(
  213. TSDBModel.release,
  214. [self.release1.id],
  215. dts[0],
  216. dts[-1],
  217. rollup=3600,
  218. tenant_ids={"referrer": "r", "organization_id": 1234},
  219. ) == {
  220. self.release1.id: [
  221. (timestamp(dts[0]), 0),
  222. (timestamp(dts[1]), 6),
  223. (timestamp(dts[2]), 0),
  224. (timestamp(dts[3]), 0),
  225. ]
  226. }
  227. def test_range_project(self):
  228. dts = [self.now + timedelta(hours=i) for i in range(4)]
  229. assert self.db.get_range(
  230. TSDBModel.project,
  231. [self.proj1.id],
  232. dts[0],
  233. dts[-1],
  234. rollup=3600,
  235. tenant_ids={"referrer": "r", "organization_id": 1234},
  236. ) == {
  237. self.proj1.id: [
  238. (timestamp(dts[0]), 6),
  239. (timestamp(dts[1]), 6),
  240. (timestamp(dts[2]), 6),
  241. (timestamp(dts[3]), 6),
  242. ]
  243. }
  244. def test_range_environment_filter(self):
  245. dts = [self.now + timedelta(hours=i) for i in range(4)]
  246. assert self.db.get_range(
  247. TSDBModel.project,
  248. [self.proj1.id],
  249. dts[0],
  250. dts[-1],
  251. rollup=3600,
  252. environment_ids=[self.env1.id],
  253. tenant_ids={"referrer": "r", "organization_id": 1234},
  254. ) == {
  255. self.proj1.id: [
  256. (timestamp(dts[0]), 6),
  257. (timestamp(dts[1]), 6),
  258. (timestamp(dts[2]), 0),
  259. (timestamp(dts[3]), 0),
  260. ]
  261. }
  262. # No events submitted for env2
  263. assert self.db.get_range(
  264. TSDBModel.project,
  265. [self.proj1.id],
  266. dts[0],
  267. dts[-1],
  268. rollup=3600,
  269. environment_ids=[self.env2.id],
  270. tenant_ids={"referrer": "r", "organization_id": 1234},
  271. ) == {
  272. self.proj1.id: [
  273. (timestamp(dts[0]), 0),
  274. (timestamp(dts[1]), 0),
  275. (timestamp(dts[2]), 0),
  276. (timestamp(dts[3]), 0),
  277. ]
  278. }
  279. # Events submitted with no environment should match default environment
  280. assert self.db.get_range(
  281. TSDBModel.project,
  282. [self.proj1.id],
  283. dts[0],
  284. dts[-1],
  285. rollup=3600,
  286. environment_ids=[self.defaultenv.id],
  287. tenant_ids={"referrer": "r", "organization_id": 1234},
  288. ) == {
  289. self.proj1.id: [
  290. (timestamp(dts[0]), 0),
  291. (timestamp(dts[1]), 0),
  292. (timestamp(dts[2]), 6),
  293. (timestamp(dts[3]), 6),
  294. ]
  295. }
  296. def test_range_rollups(self):
  297. # Daily
  298. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  299. dts = [daystart + timedelta(days=i) for i in range(2)]
  300. assert self.db.get_range(
  301. TSDBModel.project,
  302. [self.proj1.id],
  303. dts[0],
  304. dts[-1],
  305. rollup=86400,
  306. tenant_ids={"referrer": "r", "organization_id": 1234},
  307. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  308. # Minutely
  309. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  310. # Expect every 10th minute to have a 1, else 0
  311. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  312. assert self.db.get_range(
  313. TSDBModel.project,
  314. [self.proj1.id],
  315. dts[0],
  316. dts[-1],
  317. rollup=60,
  318. tenant_ids={"referrer": "r", "organization_id": 1234},
  319. ) == {self.proj1.id: expected}
  320. def test_distinct_counts_series_users(self):
  321. dts = [self.now + timedelta(hours=i) for i in range(4)]
  322. assert self.db.get_distinct_counts_series(
  323. TSDBModel.users_affected_by_group,
  324. [self.proj1group1.id],
  325. dts[0],
  326. dts[-1],
  327. rollup=3600,
  328. tenant_ids={"referrer": "r", "organization_id": 1234},
  329. ) == {
  330. self.proj1group1.id: [
  331. (timestamp(dts[0]), 1),
  332. (timestamp(dts[1]), 1),
  333. (timestamp(dts[2]), 1),
  334. (timestamp(dts[3]), 2),
  335. ]
  336. }
  337. dts = [self.now + timedelta(hours=i) for i in range(4)]
  338. assert self.db.get_distinct_counts_series(
  339. TSDBModel.users_affected_by_project,
  340. [self.proj1.id],
  341. dts[0],
  342. dts[-1],
  343. rollup=3600,
  344. tenant_ids={"referrer": "r", "organization_id": 1234},
  345. ) == {
  346. self.proj1.id: [
  347. (timestamp(dts[0]), 1),
  348. (timestamp(dts[1]), 2),
  349. (timestamp(dts[2]), 2),
  350. (timestamp(dts[3]), 2),
  351. ]
  352. }
  353. assert (
  354. self.db.get_distinct_counts_series(
  355. TSDBModel.users_affected_by_group,
  356. [],
  357. dts[0],
  358. dts[-1],
  359. rollup=3600,
  360. tenant_ids={"referrer": "r", "organization_id": 1234},
  361. )
  362. == {}
  363. )
  364. def get_distinct_counts_totals_users(self):
  365. assert self.db.get_distinct_counts_totals(
  366. TSDBModel.users_affected_by_group,
  367. [self.proj1group1.id],
  368. self.now,
  369. self.now + timedelta(hours=4),
  370. rollup=3600,
  371. tenant_ids={"referrer": "r", "organization_id": 1234},
  372. ) == {
  373. self.proj1group1.id: 2 # 2 unique users overall
  374. }
  375. assert self.db.get_distinct_counts_totals(
  376. TSDBModel.users_affected_by_group,
  377. [self.proj1group1.id],
  378. self.now,
  379. self.now,
  380. rollup=3600,
  381. tenant_ids={"referrer": "r", "organization_id": 1234},
  382. ) == {
  383. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  384. }
  385. assert self.db.get_distinct_counts_totals(
  386. TSDBModel.users_affected_by_project,
  387. [self.proj1.id],
  388. self.now,
  389. self.now + timedelta(hours=4),
  390. rollup=3600,
  391. tenant_ids={"referrer": "r", "organization_id": 1234},
  392. ) == {self.proj1.id: 2}
  393. assert (
  394. self.db.get_distinct_counts_totals(
  395. TSDBModel.users_affected_by_group,
  396. [],
  397. self.now,
  398. self.now + timedelta(hours=4),
  399. rollup=3600,
  400. tenant_ids={"referrer": "r", "organization_id": 1234},
  401. )
  402. == {}
  403. )
  404. def test_most_frequent(self):
  405. assert self.db.get_most_frequent(
  406. TSDBModel.frequent_issues_by_project,
  407. [self.proj1.id],
  408. self.now,
  409. self.now + timedelta(hours=4),
  410. rollup=3600,
  411. tenant_ids={"referrer": "r", "organization_id": 1234},
  412. ) in [
  413. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  414. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  415. ] # Both issues equally frequent
  416. assert (
  417. self.db.get_most_frequent(
  418. TSDBModel.frequent_issues_by_project,
  419. [],
  420. self.now,
  421. self.now + timedelta(hours=4),
  422. rollup=3600,
  423. tenant_ids={"referrer": "r", "organization_id": 1234},
  424. )
  425. == {}
  426. )
  427. def test_frequency_series(self):
  428. dts = [self.now + timedelta(hours=i) for i in range(4)]
  429. assert self.db.get_frequency_series(
  430. TSDBModel.frequent_releases_by_group,
  431. {
  432. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  433. self.proj1group2.id: (self.group2release1env1.id,),
  434. },
  435. dts[0],
  436. dts[-1],
  437. rollup=3600,
  438. tenant_ids={"referrer": "r", "organization_id": 1234},
  439. ) == {
  440. self.proj1group1.id: [
  441. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  442. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  443. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  444. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  445. ],
  446. self.proj1group2.id: [
  447. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  448. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  449. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  450. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  451. ],
  452. }
  453. assert (
  454. self.db.get_frequency_series(
  455. TSDBModel.frequent_releases_by_group,
  456. {},
  457. dts[0],
  458. dts[-1],
  459. rollup=3600,
  460. tenant_ids={"referrer": "r", "organization_id": 1234},
  461. )
  462. == {}
  463. )
  464. def test_result_shape(self):
  465. """
  466. Tests that the results from the different TSDB methods have the
  467. expected format.
  468. """
  469. project_id = self.proj1.id
  470. dts = [self.now + timedelta(hours=i) for i in range(4)]
  471. results = self.db.get_most_frequent(
  472. TSDBModel.frequent_issues_by_project,
  473. [project_id],
  474. dts[0],
  475. dts[0],
  476. tenant_ids={"referrer": "r", "organization_id": 1234},
  477. )
  478. assert has_shape(results, {1: [(1, 1.0)]})
  479. results = self.db.get_most_frequent_series(
  480. TSDBModel.frequent_issues_by_project,
  481. [project_id],
  482. dts[0],
  483. dts[0],
  484. tenant_ids={"referrer": "r", "organization_id": 1234},
  485. )
  486. assert has_shape(results, {1: [(1, {1: 1.0})]})
  487. items = {
  488. # {project_id: (issue_id, issue_id, ...)}
  489. project_id: (self.proj1group1.id, self.proj1group2.id)
  490. }
  491. results = self.db.get_frequency_series(
  492. TSDBModel.frequent_issues_by_project,
  493. items,
  494. dts[0],
  495. dts[-1],
  496. tenant_ids={"referrer": "r", "organization_id": 1234},
  497. )
  498. assert has_shape(results, {1: [(1, {1: 1})]})
  499. results = self.db.get_frequency_totals(
  500. TSDBModel.frequent_issues_by_project,
  501. items,
  502. dts[0],
  503. dts[-1],
  504. tenant_ids={"referrer": "r", "organization_id": 1234},
  505. )
  506. assert has_shape(results, {1: {1: 1}})
  507. results = self.db.get_range(
  508. TSDBModel.project,
  509. [project_id],
  510. dts[0],
  511. dts[-1],
  512. tenant_ids={"referrer": "r", "organization_id": 1234},
  513. )
  514. assert has_shape(results, {1: [(1, 1)]})
  515. results = self.db.get_distinct_counts_series(
  516. TSDBModel.users_affected_by_project,
  517. [project_id],
  518. dts[0],
  519. dts[-1],
  520. tenant_ids={"referrer": "r", "organization_id": 1234},
  521. )
  522. assert has_shape(results, {1: [(1, 1)]})
  523. results = self.db.get_distinct_counts_totals(
  524. TSDBModel.users_affected_by_project,
  525. [project_id],
  526. dts[0],
  527. dts[-1],
  528. tenant_ids={"referrer": "r", "organization_id": 1234},
  529. )
  530. assert has_shape(results, {1: 1})
  531. results = self.db.get_distinct_counts_union(
  532. TSDBModel.users_affected_by_project,
  533. [project_id],
  534. dts[0],
  535. dts[-1],
  536. tenant_ids={"referrer": "r", "organization_id": 1234},
  537. )
  538. assert has_shape(results, 1)
  539. def test_calculated_limit(self):
  540. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  541. # 24h test
  542. rollup = 3600
  543. end = self.now
  544. start = end + timedelta(days=-1, seconds=rollup)
  545. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  546. assert snuba.call_args.args[0].query.limit == Limit(120)
  547. # 14 day test
  548. rollup = 86400
  549. start = end + timedelta(days=-14, seconds=rollup)
  550. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  551. assert snuba.call_args.args[0].query.limit == Limit(70)
  552. # 1h test
  553. rollup = 3600
  554. end = self.now
  555. start = end + timedelta(hours=-1, seconds=rollup)
  556. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  557. assert snuba.call_args.args[0].query.limit == Limit(5)
  558. @patch("sentry.utils.snuba.OVERRIDE_OPTIONS", new={"consistent": True})
  559. def test_tsdb_with_consistent(self):
  560. with patch("sentry.utils.snuba._apply_cache_and_build_results") as snuba:
  561. rollup = 3600
  562. end = self.now
  563. start = end + timedelta(days=-1, seconds=rollup)
  564. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  565. assert snuba.call_args.args[0][0][0].query.limit == Limit(120)
  566. assert snuba.call_args.args[0][0][0].flags.consistent is True
  567. @region_silo_test
  568. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  569. def setUp(self):
  570. super().setUp()
  571. self.db = SnubaTSDB()
  572. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  573. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  574. )
  575. self.proj1 = self.create_project()
  576. self.env1 = Environment.objects.get_or_create(
  577. organization_id=self.proj1.organization_id, name="test"
  578. )[0]
  579. self.env2 = Environment.objects.get_or_create(
  580. organization_id=self.proj1.organization_id, name="dev"
  581. )[0]
  582. defaultenv = ""
  583. group1_fingerprint = f"{ProfileFileIOGroupType.type_id}-group1"
  584. group2_fingerprint = f"{ProfileFileIOGroupType.type_id}-group2"
  585. groups = {}
  586. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  587. event, occurrence, group_info = self.store_search_issue(
  588. project_id=self.proj1.id,
  589. # change every 55 min so some hours have 1 user, some have 2
  590. user_id=r // 3300,
  591. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  592. # release_version=str(r // 3600) * 10, # 1 per hour,
  593. environment=[self.env1.name, None][(r // 7200) % 3],
  594. insert_time=self.now + timedelta(seconds=r),
  595. )
  596. if group_info:
  597. groups[group_info.group.id] = group_info.group
  598. all_groups = list(groups.values())
  599. self.proj1group1 = all_groups[0]
  600. self.proj1group2 = all_groups[1]
  601. self.defaultenv = Environment.objects.get(name=defaultenv)
  602. def test_range_group_manual_group_time_rollup(self):
  603. project = self.create_project()
  604. # these are the only granularities/rollups that be actually be used
  605. GRANULARITIES = [
  606. (10, timedelta(seconds=10), 5),
  607. (60 * 60, timedelta(hours=1), 6),
  608. (60 * 60 * 24, timedelta(days=1), 15),
  609. ]
  610. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  611. hour=0, minute=0, second=0
  612. )
  613. for step, delta, times in GRANULARITIES:
  614. series = [start + (delta * i) for i in range(times)]
  615. series_ts = [int(to_timestamp(ts)) for ts in series]
  616. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  617. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  618. step,
  619. series_ts,
  620. )
  621. for time_step in series:
  622. _, _, group_info = self.store_search_issue(
  623. project_id=project.id,
  624. user_id=0,
  625. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  626. environment=None,
  627. insert_time=time_step,
  628. )
  629. assert self.db.get_range(
  630. TSDBModel.group_generic,
  631. [group_info.group.id],
  632. series[0],
  633. series[-1],
  634. rollup=None,
  635. tenant_ids={"referrer": "test", "organization_id": 1},
  636. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  637. def test_range_groups_mult(self):
  638. now = (datetime.utcnow() - timedelta(days=1)).replace(
  639. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  640. )
  641. dts = [now + timedelta(hours=i) for i in range(4)]
  642. project = self.create_project()
  643. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group4"
  644. groups = []
  645. for i in range(0, 11):
  646. _, _, group_info = self.store_search_issue(
  647. project_id=project.id,
  648. user_id=0,
  649. fingerprints=[group_fingerprint],
  650. environment=None,
  651. insert_time=now + timedelta(minutes=i * 10),
  652. )
  653. if group_info:
  654. groups.append(group_info.group)
  655. group = groups[0]
  656. assert self.db.get_range(
  657. TSDBModel.group_generic,
  658. [group.id],
  659. dts[0],
  660. dts[-1],
  661. rollup=3600,
  662. tenant_ids={"referrer": "test", "organization_id": 1},
  663. ) == {
  664. group.id: [
  665. (timestamp(dts[0]), 6),
  666. (timestamp(dts[1]), 5),
  667. (timestamp(dts[2]), 0),
  668. (timestamp(dts[3]), 0),
  669. ]
  670. }
  671. def test_range_groups_simple(self):
  672. project = self.create_project()
  673. now = (datetime.utcnow() - timedelta(days=1)).replace(
  674. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  675. )
  676. group_fingerprint = f"{ProfileFileIOGroupType.type_id}-group5"
  677. ids = [1, 2, 3, 4, 5]
  678. groups = []
  679. for r in ids:
  680. # for r in range(0, 9, 1):
  681. event, occurrence, group_info = self.store_search_issue(
  682. project_id=project.id,
  683. # change every 55 min so some hours have 1 user, some have 2
  684. user_id=r,
  685. fingerprints=[group_fingerprint],
  686. environment=None,
  687. # release_version=str(r // 3600) * 10, # 1 per hour,
  688. insert_time=now,
  689. )
  690. if group_info:
  691. groups.append(group_info.group)
  692. group = groups[0]
  693. dts = [now + timedelta(hours=i) for i in range(4)]
  694. assert self.db.get_range(
  695. TSDBModel.group_generic,
  696. [group.id],
  697. dts[0],
  698. dts[-1],
  699. rollup=3600,
  700. tenant_ids={"referrer": "test", "organization_id": 1},
  701. ) == {
  702. group.id: [
  703. (timestamp(dts[0]), len(ids)),
  704. (timestamp(dts[1]), 0),
  705. (timestamp(dts[2]), 0),
  706. (timestamp(dts[3]), 0),
  707. ]
  708. }
  709. def test_range_groups(self):
  710. dts = [self.now + timedelta(hours=i) for i in range(4)]
  711. # Multiple groups
  712. assert self.db.get_range(
  713. TSDBModel.group_generic,
  714. [self.proj1group1.id, self.proj1group2.id],
  715. dts[0],
  716. dts[-1],
  717. rollup=3600,
  718. tenant_ids={"referrer": "test", "organization_id": 1},
  719. ) == {
  720. self.proj1group1.id: [
  721. (timestamp(dts[0]), 3),
  722. (timestamp(dts[1]), 3),
  723. (timestamp(dts[2]), 3),
  724. (timestamp(dts[3]), 3),
  725. ],
  726. self.proj1group2.id: [
  727. (timestamp(dts[0]), 3),
  728. (timestamp(dts[1]), 3),
  729. (timestamp(dts[2]), 3),
  730. (timestamp(dts[3]), 3),
  731. ],
  732. }
  733. assert (
  734. self.db.get_range(
  735. TSDBModel.group_generic,
  736. [],
  737. dts[0],
  738. dts[-1],
  739. rollup=3600,
  740. tenant_ids={"referrer": "test", "organization_id": 1},
  741. )
  742. == {}
  743. )
  744. def test_get_distinct_counts_totals_users(self):
  745. assert self.db.get_distinct_counts_totals(
  746. TSDBModel.users_affected_by_generic_group,
  747. [self.proj1group1.id],
  748. self.now,
  749. self.now + timedelta(hours=4),
  750. rollup=3600,
  751. tenant_ids={"referrer": "test", "organization_id": 1},
  752. ) == {
  753. self.proj1group1.id: 5 # 5 unique users overall
  754. }
  755. assert self.db.get_distinct_counts_totals(
  756. TSDBModel.users_affected_by_generic_group,
  757. [self.proj1group1.id],
  758. self.now,
  759. self.now,
  760. rollup=3600,
  761. tenant_ids={"referrer": "test", "organization_id": 1},
  762. ) == {
  763. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  764. }
  765. assert (
  766. self.db.get_distinct_counts_totals(
  767. TSDBModel.users_affected_by_generic_group,
  768. [],
  769. self.now,
  770. self.now + timedelta(hours=4),
  771. rollup=3600,
  772. tenant_ids={"referrer": "test", "organization_id": 1},
  773. )
  774. == {}
  775. )
  776. def test_get_sums(self):
  777. assert self.db.get_sums(
  778. model=TSDBModel.group_generic,
  779. keys=[self.proj1group1.id, self.proj1group2.id],
  780. start=self.now,
  781. end=self.now + timedelta(hours=4),
  782. tenant_ids={"referrer": "test", "organization_id": 1},
  783. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  784. def test_get_data_or_conditions_parsed(self):
  785. """
  786. Verify parsing the legacy format with nested OR conditions works
  787. """
  788. conditions = [
  789. # or conditions in the legacy format needs open and close brackets for precedence
  790. # there's some special casing when parsing conditions that specifically handles this
  791. [
  792. [["isNull", ["environment"]], "=", 1],
  793. ["environment", "IN", [self.env1.name]],
  794. ]
  795. ]
  796. data1 = self.db.get_data(
  797. model=TSDBModel.group_generic,
  798. keys=[self.proj1group1.id, self.proj1group2.id],
  799. conditions=conditions,
  800. start=self.now,
  801. end=self.now + timedelta(hours=4),
  802. tenant_ids={"referrer": "test", "organization_id": 1},
  803. )
  804. data2 = self.db.get_data(
  805. model=TSDBModel.group_generic,
  806. keys=[self.proj1group1.id, self.proj1group2.id],
  807. start=self.now,
  808. end=self.now + timedelta(hours=4),
  809. tenant_ids={"referrer": "test", "organization_id": 1},
  810. )
  811. # the above queries should return the same data since all groups either have:
  812. # environment=None or environment=test
  813. # so the condition really shouldn't be filtering anything
  814. assert data1 == data2
  815. class AddJitterToSeriesTest(TestCase):
  816. def setUp(self):
  817. self.db = SnubaTSDB()
  818. def run_test(self, end, interval, jitter, expected_start, expected_end):
  819. end = end.replace(tzinfo=pytz.UTC)
  820. start = end - interval
  821. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  822. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  823. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  824. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  825. def test(self):
  826. self.run_test(
  827. end=datetime(2022, 5, 18, 10, 23, 4),
  828. interval=timedelta(hours=1),
  829. jitter=5,
  830. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  831. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  832. )
  833. self.run_test(
  834. end=datetime(2022, 5, 18, 10, 23, 8),
  835. interval=timedelta(hours=1),
  836. jitter=5,
  837. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  838. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  839. )
  840. # Jitter should be the same
  841. self.run_test(
  842. end=datetime(2022, 5, 18, 10, 23, 8),
  843. interval=timedelta(hours=1),
  844. jitter=55,
  845. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  846. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  847. )
  848. self.run_test(
  849. end=datetime(2022, 5, 18, 22, 33, 2),
  850. interval=timedelta(minutes=1),
  851. jitter=3,
  852. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  853. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  854. )
  855. def test_empty_series(self):
  856. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  857. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []