test_tsdb_backend.py 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. from datetime import datetime, timedelta, timezone
  2. from unittest.mock import patch
  3. import pytz
  4. from snuba_sdk import Limit
  5. from sentry.models import Environment, Group, GroupRelease, Release
  6. from sentry.testutils import SnubaTestCase, TestCase
  7. from sentry.testutils.helpers.datetime import iso_format
  8. from sentry.testutils.performance_issues.store_transaction import PerfIssueTransactionTestMixin
  9. from sentry.testutils.silo import region_silo_test
  10. from sentry.tsdb.base import TSDBModel
  11. from sentry.tsdb.snuba import SnubaTSDB
  12. from sentry.types.issues import GroupType
  13. from sentry.utils.dates import to_datetime, to_timestamp
  14. from sentry.utils.snuba import aliased_query
  15. from tests.sentry.issues.test_utils import SearchIssueTestMixin
  16. def timestamp(d):
  17. t = int(to_timestamp(d))
  18. return t - (t % 3600)
  19. def has_shape(data, shape, allow_empty=False):
  20. """
  21. Determine if a data object has the provided shape
  22. At any level, the object in `data` and in `shape` must have the same type.
  23. A dict is the same shape if all its keys and values have the same shape as the
  24. key/value in `shape`. The number of keys/values is not relevant.
  25. A list is the same shape if all its items have the same shape as the value
  26. in `shape`
  27. A tuple is the same shape if it has the same length as `shape` and all the
  28. values have the same shape as the corresponding value in `shape`
  29. Any other object simply has to have the same type.
  30. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  31. """
  32. if not isinstance(data, type(shape)):
  33. return False
  34. if isinstance(data, dict):
  35. return (
  36. (allow_empty or len(data) > 0)
  37. and all(has_shape(k, list(shape.keys())[0]) for k in data.keys())
  38. and all(has_shape(v, list(shape.values())[0]) for v in data.values())
  39. )
  40. elif isinstance(data, list):
  41. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  42. elif isinstance(data, tuple):
  43. return len(data) == len(shape) and all(
  44. has_shape(data[i], shape[i]) for i in range(len(data))
  45. )
  46. else:
  47. return True
  48. class SnubaTSDBTest(TestCase, SnubaTestCase):
  49. def setUp(self):
  50. super().setUp()
  51. self.db = SnubaTSDB()
  52. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  53. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  54. )
  55. self.proj1 = self.create_project()
  56. env1 = "test"
  57. env2 = "dev"
  58. defaultenv = ""
  59. release1 = "1" * 10
  60. release2 = "2" * 10
  61. self.release1 = Release.objects.create(
  62. organization_id=self.organization.id, version=release1, date_added=self.now
  63. )
  64. self.release1.add_project(self.proj1)
  65. self.release2 = Release.objects.create(
  66. organization_id=self.organization.id, version=release2, date_added=self.now
  67. )
  68. self.release2.add_project(self.proj1)
  69. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  70. self.store_event(
  71. data={
  72. "event_id": (str(r) * 32)[:32],
  73. "message": "message 1",
  74. "platform": "python",
  75. "fingerprint": [["group-1"], ["group-2"]][
  76. (r // 600) % 2
  77. ], # Switch every 10 mins
  78. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  79. "tags": {
  80. "foo": "bar",
  81. "baz": "quux",
  82. # Switch every 2 hours
  83. "environment": [env1, None][(r // 7200) % 3],
  84. "sentry:user": f"id:user{r // 3300}",
  85. },
  86. "user": {
  87. # change every 55 min so some hours have 1 user, some have 2
  88. "id": f"user{r // 3300}",
  89. },
  90. "release": str(r // 3600) * 10, # 1 per hour,
  91. },
  92. project_id=self.proj1.id,
  93. )
  94. groups = Group.objects.filter(project=self.proj1).order_by("id")
  95. self.proj1group1 = groups[0]
  96. self.proj1group2 = groups[1]
  97. self.env1 = Environment.objects.get(name=env1)
  98. self.env2 = self.create_environment(name=env2) # No events
  99. self.defaultenv = Environment.objects.get(name=defaultenv)
  100. self.group1release1env1 = GroupRelease.objects.get(
  101. project_id=self.proj1.id,
  102. group_id=self.proj1group1.id,
  103. release_id=self.release1.id,
  104. environment=env1,
  105. )
  106. self.group1release2env1 = GroupRelease.objects.create(
  107. project_id=self.proj1.id,
  108. group_id=self.proj1group1.id,
  109. release_id=self.release2.id,
  110. environment=env1,
  111. )
  112. self.group2release1env1 = GroupRelease.objects.get(
  113. project_id=self.proj1.id,
  114. group_id=self.proj1group2.id,
  115. release_id=self.release1.id,
  116. environment=env1,
  117. )
  118. def test_range_single(self):
  119. env1 = "test"
  120. project = self.create_project()
  121. for r in range(0, 600 * 6 * 4, 300): # Every 10 min for 4 hours
  122. self.store_event(
  123. data={
  124. "event_id": (str(r) * 32)[:32],
  125. "message": "message 1",
  126. "platform": "python",
  127. "fingerprint": ["group-1"],
  128. "timestamp": iso_format(self.now + timedelta(seconds=r)),
  129. "tags": {
  130. "foo": "bar",
  131. "baz": "quux",
  132. # Switch every 2 hours
  133. "environment": [env1, None][(r // 7200) % 3],
  134. "sentry:user": f"id:user{r // 3300}",
  135. },
  136. "user": {
  137. # change every 55 min so some hours have 1 user, some have 2
  138. "id": f"user{r // 3300}",
  139. },
  140. "release": str(r // 3600) * 10, # 1 per hour,
  141. },
  142. project_id=project.id,
  143. )
  144. groups = Group.objects.filter(project=project).order_by("id")
  145. group = groups[0]
  146. dts = [self.now + timedelta(hours=i) for i in range(4)]
  147. assert self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1], rollup=3600) == {
  148. group.id: [
  149. (timestamp(dts[0]), 6 * 2),
  150. (timestamp(dts[1]), 6 * 2),
  151. (timestamp(dts[2]), 6 * 2),
  152. (timestamp(dts[3]), 6 * 2),
  153. ]
  154. }
  155. def test_range_groups(self):
  156. dts = [self.now + timedelta(hours=i) for i in range(4)]
  157. assert self.db.get_range(
  158. TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  159. ) == {
  160. self.proj1group1.id: [
  161. (timestamp(dts[0]), 3),
  162. (timestamp(dts[1]), 3),
  163. (timestamp(dts[2]), 3),
  164. (timestamp(dts[3]), 3),
  165. ]
  166. }
  167. # Multiple groups
  168. assert self.db.get_range(
  169. TSDBModel.group,
  170. [self.proj1group1.id, self.proj1group2.id],
  171. dts[0],
  172. dts[-1],
  173. rollup=3600,
  174. ) == {
  175. self.proj1group1.id: [
  176. (timestamp(dts[0]), 3),
  177. (timestamp(dts[1]), 3),
  178. (timestamp(dts[2]), 3),
  179. (timestamp(dts[3]), 3),
  180. ],
  181. self.proj1group2.id: [
  182. (timestamp(dts[0]), 3),
  183. (timestamp(dts[1]), 3),
  184. (timestamp(dts[2]), 3),
  185. (timestamp(dts[3]), 3),
  186. ],
  187. }
  188. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  189. def test_range_releases(self):
  190. dts = [self.now + timedelta(hours=i) for i in range(4)]
  191. assert self.db.get_range(
  192. TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600
  193. ) == {
  194. self.release1.id: [
  195. (timestamp(dts[0]), 0),
  196. (timestamp(dts[1]), 6),
  197. (timestamp(dts[2]), 0),
  198. (timestamp(dts[3]), 0),
  199. ]
  200. }
  201. def test_range_project(self):
  202. dts = [self.now + timedelta(hours=i) for i in range(4)]
  203. assert self.db.get_range(
  204. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  205. ) == {
  206. self.proj1.id: [
  207. (timestamp(dts[0]), 6),
  208. (timestamp(dts[1]), 6),
  209. (timestamp(dts[2]), 6),
  210. (timestamp(dts[3]), 6),
  211. ]
  212. }
  213. def test_range_environment_filter(self):
  214. dts = [self.now + timedelta(hours=i) for i in range(4)]
  215. assert self.db.get_range(
  216. TSDBModel.project,
  217. [self.proj1.id],
  218. dts[0],
  219. dts[-1],
  220. rollup=3600,
  221. environment_ids=[self.env1.id],
  222. ) == {
  223. self.proj1.id: [
  224. (timestamp(dts[0]), 6),
  225. (timestamp(dts[1]), 6),
  226. (timestamp(dts[2]), 0),
  227. (timestamp(dts[3]), 0),
  228. ]
  229. }
  230. # No events submitted for env2
  231. assert self.db.get_range(
  232. TSDBModel.project,
  233. [self.proj1.id],
  234. dts[0],
  235. dts[-1],
  236. rollup=3600,
  237. environment_ids=[self.env2.id],
  238. ) == {
  239. self.proj1.id: [
  240. (timestamp(dts[0]), 0),
  241. (timestamp(dts[1]), 0),
  242. (timestamp(dts[2]), 0),
  243. (timestamp(dts[3]), 0),
  244. ]
  245. }
  246. # Events submitted with no environment should match default environment
  247. assert self.db.get_range(
  248. TSDBModel.project,
  249. [self.proj1.id],
  250. dts[0],
  251. dts[-1],
  252. rollup=3600,
  253. environment_ids=[self.defaultenv.id],
  254. ) == {
  255. self.proj1.id: [
  256. (timestamp(dts[0]), 0),
  257. (timestamp(dts[1]), 0),
  258. (timestamp(dts[2]), 6),
  259. (timestamp(dts[3]), 6),
  260. ]
  261. }
  262. def test_range_rollups(self):
  263. # Daily
  264. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  265. dts = [daystart + timedelta(days=i) for i in range(2)]
  266. assert self.db.get_range(
  267. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400
  268. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  269. # Minutely
  270. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  271. # Expect every 10th minute to have a 1, else 0
  272. expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)]
  273. assert self.db.get_range(
  274. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60
  275. ) == {self.proj1.id: expected}
  276. def test_distinct_counts_series_users(self):
  277. dts = [self.now + timedelta(hours=i) for i in range(4)]
  278. assert self.db.get_distinct_counts_series(
  279. TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  280. ) == {
  281. self.proj1group1.id: [
  282. (timestamp(dts[0]), 1),
  283. (timestamp(dts[1]), 1),
  284. (timestamp(dts[2]), 1),
  285. (timestamp(dts[3]), 2),
  286. ]
  287. }
  288. dts = [self.now + timedelta(hours=i) for i in range(4)]
  289. assert self.db.get_distinct_counts_series(
  290. TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  291. ) == {
  292. self.proj1.id: [
  293. (timestamp(dts[0]), 1),
  294. (timestamp(dts[1]), 2),
  295. (timestamp(dts[2]), 2),
  296. (timestamp(dts[3]), 2),
  297. ]
  298. }
  299. assert (
  300. self.db.get_distinct_counts_series(
  301. TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600
  302. )
  303. == {}
  304. )
  305. def get_distinct_counts_totals_users(self):
  306. assert self.db.get_distinct_counts_totals(
  307. TSDBModel.users_affected_by_group,
  308. [self.proj1group1.id],
  309. self.now,
  310. self.now + timedelta(hours=4),
  311. rollup=3600,
  312. ) == {
  313. self.proj1group1.id: 2 # 2 unique users overall
  314. }
  315. assert self.db.get_distinct_counts_totals(
  316. TSDBModel.users_affected_by_group,
  317. [self.proj1group1.id],
  318. self.now,
  319. self.now,
  320. rollup=3600,
  321. ) == {
  322. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  323. }
  324. assert self.db.get_distinct_counts_totals(
  325. TSDBModel.users_affected_by_project,
  326. [self.proj1.id],
  327. self.now,
  328. self.now + timedelta(hours=4),
  329. rollup=3600,
  330. ) == {self.proj1.id: 2}
  331. assert (
  332. self.db.get_distinct_counts_totals(
  333. TSDBModel.users_affected_by_group,
  334. [],
  335. self.now,
  336. self.now + timedelta(hours=4),
  337. rollup=3600,
  338. )
  339. == {}
  340. )
  341. def test_most_frequent(self):
  342. assert self.db.get_most_frequent(
  343. TSDBModel.frequent_issues_by_project,
  344. [self.proj1.id],
  345. self.now,
  346. self.now + timedelta(hours=4),
  347. rollup=3600,
  348. ) in [
  349. {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]},
  350. {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]},
  351. ] # Both issues equally frequent
  352. assert (
  353. self.db.get_most_frequent(
  354. TSDBModel.frequent_issues_by_project,
  355. [],
  356. self.now,
  357. self.now + timedelta(hours=4),
  358. rollup=3600,
  359. )
  360. == {}
  361. )
  362. def test_frequency_series(self):
  363. dts = [self.now + timedelta(hours=i) for i in range(4)]
  364. assert self.db.get_frequency_series(
  365. TSDBModel.frequent_releases_by_group,
  366. {
  367. self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id),
  368. self.proj1group2.id: (self.group2release1env1.id,),
  369. },
  370. dts[0],
  371. dts[-1],
  372. rollup=3600,
  373. ) == {
  374. self.proj1group1.id: [
  375. (timestamp(dts[0]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  376. (timestamp(dts[1]), {self.group1release1env1.id: 3, self.group1release2env1.id: 0}),
  377. (timestamp(dts[2]), {self.group1release1env1.id: 0, self.group1release2env1.id: 3}),
  378. (timestamp(dts[3]), {self.group1release1env1.id: 0, self.group1release2env1.id: 0}),
  379. ],
  380. self.proj1group2.id: [
  381. (timestamp(dts[0]), {self.group2release1env1.id: 0}),
  382. (timestamp(dts[1]), {self.group2release1env1.id: 3}),
  383. (timestamp(dts[2]), {self.group2release1env1.id: 0}),
  384. (timestamp(dts[3]), {self.group2release1env1.id: 0}),
  385. ],
  386. }
  387. assert (
  388. self.db.get_frequency_series(
  389. TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600
  390. )
  391. == {}
  392. )
  393. def test_result_shape(self):
  394. """
  395. Tests that the results from the different TSDB methods have the
  396. expected format.
  397. """
  398. project_id = self.proj1.id
  399. dts = [self.now + timedelta(hours=i) for i in range(4)]
  400. results = self.db.get_most_frequent(
  401. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  402. )
  403. assert has_shape(results, {1: [(1, 1.0)]})
  404. results = self.db.get_most_frequent_series(
  405. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  406. )
  407. assert has_shape(results, {1: [(1, {1: 1.0})]})
  408. items = {
  409. # {project_id: (issue_id, issue_id, ...)}
  410. project_id: (self.proj1group1.id, self.proj1group2.id)
  411. }
  412. results = self.db.get_frequency_series(
  413. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  414. )
  415. assert has_shape(results, {1: [(1, {1: 1})]})
  416. results = self.db.get_frequency_totals(
  417. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  418. )
  419. assert has_shape(results, {1: {1: 1}})
  420. results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
  421. assert has_shape(results, {1: [(1, 1)]})
  422. results = self.db.get_distinct_counts_series(
  423. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  424. )
  425. assert has_shape(results, {1: [(1, 1)]})
  426. results = self.db.get_distinct_counts_totals(
  427. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  428. )
  429. assert has_shape(results, {1: 1})
  430. results = self.db.get_distinct_counts_union(
  431. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  432. )
  433. assert has_shape(results, 1)
  434. def test_calculated_limit(self):
  435. with patch("sentry.tsdb.snuba.raw_snql_query") as snuba:
  436. # 24h test
  437. rollup = 3600
  438. end = self.now
  439. start = end + timedelta(days=-1, seconds=rollup)
  440. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  441. assert snuba.call_args.args[0].query.limit == Limit(120)
  442. # 14 day test
  443. rollup = 86400
  444. start = end + timedelta(days=-14, seconds=rollup)
  445. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  446. assert snuba.call_args.args[0].query.limit == Limit(70)
  447. # 1h test
  448. rollup = 3600
  449. end = self.now
  450. start = end + timedelta(hours=-1, seconds=rollup)
  451. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  452. assert snuba.call_args.args[0].query.limit == Limit(5)
  453. @region_silo_test
  454. class SnubaTSDBGroupPerformanceTest(TestCase, SnubaTestCase, PerfIssueTransactionTestMixin):
  455. def setUp(self):
  456. super().setUp()
  457. self.db = SnubaTSDB()
  458. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  459. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  460. )
  461. self.proj1 = self.create_project()
  462. self.env1 = Environment.objects.get_or_create(
  463. organization_id=self.proj1.organization_id, name="test"
  464. )[0]
  465. self.env2 = Environment.objects.get_or_create(
  466. organization_id=self.proj1.organization_id, name="dev"
  467. )[0]
  468. defaultenv = ""
  469. group1_fingerprint = f"{GroupType.PERFORMANCE_RENDER_BLOCKING_ASSET_SPAN.value}-group1"
  470. group2_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group2"
  471. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  472. event = self.store_transaction(
  473. environment=[self.env1.name, None][(r // 7200) % 3],
  474. project_id=self.proj1.id,
  475. # change every 55 min so some hours have 1 user, some have 2
  476. user_id=f"user{r // 3300}",
  477. # release_version=str(r // 3600) * 10, # 1 per hour,
  478. timestamp=self.now + timedelta(seconds=r),
  479. fingerprint=[group1_fingerprint, group2_fingerprint] if ((r // 600) % 2) else [],
  480. )
  481. self.proj1group1 = event.groups[0]
  482. self.proj1group2 = event.groups[1]
  483. self.defaultenv = Environment.objects.get(name=defaultenv)
  484. def test_range_groups_single(self):
  485. from sentry.snuba.dataset import Dataset
  486. now = (datetime.utcnow() - timedelta(days=1)).replace(
  487. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  488. )
  489. dts = [now + timedelta(hours=i) for i in range(4)]
  490. project = self.create_project()
  491. group_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group3"
  492. # not sure what's going on here, but `times=1,2,3,4` work fine
  493. # fails with anything above 4
  494. times = 4
  495. event_ids = []
  496. events = []
  497. for i in range(0, times):
  498. res = self.store_transaction(
  499. environment=None,
  500. project_id=project.id,
  501. user_id="my_user",
  502. timestamp=now + timedelta(minutes=i * 10),
  503. fingerprint=[group_fingerprint],
  504. )
  505. grouped_by_project = aliased_query(
  506. dataset=Dataset.Transactions,
  507. start=None,
  508. end=None,
  509. groupby=None,
  510. conditions=None,
  511. filter_keys={"project_id": [project.id], "event_id": [res.event_id]},
  512. selected_columns=["event_id", "project_id", "group_ids"],
  513. aggregations=None,
  514. )
  515. assert grouped_by_project["data"][0]["event_id"] == res.event_id
  516. from sentry.eventstore.models import Event
  517. event_from_nodestore = Event(project_id=project.id, event_id=res.event_id)
  518. assert event_from_nodestore.event_id == res.event_id
  519. event_ids.append(res.event_id)
  520. events.append(res)
  521. group = events[0].groups[0]
  522. transactions_for_project = aliased_query(
  523. dataset=Dataset.Transactions,
  524. start=None,
  525. end=None,
  526. groupby=None,
  527. conditions=None,
  528. filter_keys={"project_id": [project.id]},
  529. selected_columns=["project_id", "event_id"],
  530. aggregations=None,
  531. )
  532. assert len(transactions_for_project["data"]) == times
  533. transactions_by_group = aliased_query(
  534. dataset=Dataset.Transactions,
  535. start=None,
  536. end=None,
  537. # start=group.first_seen,
  538. # end=now + timedelta(hours=4),
  539. groupby=["group_id"],
  540. conditions=None,
  541. filter_keys={"project_id": [project.id], "group_id": [group.id]},
  542. aggregations=[
  543. ["arrayJoin", ["group_ids"], "group_id"],
  544. ["count()", "", "times_seen"],
  545. ],
  546. )
  547. assert transactions_by_group["data"][0]["times_seen"] == times # 1 + (times % 5)
  548. assert self.db.get_range(
  549. TSDBModel.group_performance,
  550. [group.id],
  551. dts[0],
  552. dts[-1],
  553. rollup=3600,
  554. ) == {
  555. group.id: [
  556. # (timestamp(dts[0]), 1 + (times % 5)),
  557. (timestamp(dts[0]), times),
  558. (timestamp(dts[1]), 0),
  559. (timestamp(dts[2]), 0),
  560. (timestamp(dts[3]), 0),
  561. ]
  562. }
  563. def test_range_groups_mult(self):
  564. now = (datetime.utcnow() - timedelta(days=1)).replace(
  565. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  566. )
  567. dts = [now + timedelta(hours=i) for i in range(4)]
  568. project = self.create_project()
  569. group_fingerprint = f"{GroupType.PERFORMANCE_N_PLUS_ONE_DB_QUERIES.value}-group4"
  570. ids = ["a", "b", "c", "d", "e", "f", "1", "2", "3", "4", "5"]
  571. events = []
  572. for i, _ in enumerate(ids):
  573. event = self.store_transaction(
  574. environment=None,
  575. project_id=project.id,
  576. user_id="my_user",
  577. timestamp=now + timedelta(minutes=i * 10),
  578. fingerprint=[group_fingerprint],
  579. )
  580. events.append(event)
  581. group = events[0].groups[0]
  582. assert self.db.get_range(
  583. TSDBModel.group_performance,
  584. [group.id],
  585. dts[0],
  586. dts[-1],
  587. rollup=3600,
  588. ) == {
  589. group.id: [
  590. (timestamp(dts[0]), 6),
  591. (timestamp(dts[1]), 5),
  592. (timestamp(dts[2]), 0),
  593. (timestamp(dts[3]), 0),
  594. ]
  595. }
  596. def test_range_groups_simple(self):
  597. project = self.create_project()
  598. now = (datetime.utcnow() - timedelta(days=1)).replace(
  599. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  600. )
  601. group_fingerprint = f"{GroupType.PERFORMANCE_RENDER_BLOCKING_ASSET_SPAN.value}-group5"
  602. # for r in range(0, 14400, 600): # Every 10 min for 4 hours
  603. # for r in [1, 2, 3, 4, 5, 6, 7, 8]:
  604. ids = ["a", "b", "c", "d", "e"] # , "f"]
  605. events = []
  606. for r in ids:
  607. # for r in range(0, 9, 1):
  608. event = self.store_transaction(
  609. environment=None,
  610. project_id=project.id,
  611. # change every 55 min so some hours have 1 user, some have 2
  612. user_id=f"user{r}",
  613. # release_version=str(r // 3600) * 10, # 1 per hour,
  614. timestamp=now,
  615. fingerprint=[group_fingerprint],
  616. )
  617. events.append(event)
  618. group = events[0].groups[0]
  619. dts = [now + timedelta(hours=i) for i in range(4)]
  620. assert self.db.get_range(
  621. TSDBModel.group_performance,
  622. [group.id],
  623. dts[0],
  624. dts[-1],
  625. rollup=3600,
  626. ) == {
  627. group.id: [
  628. (timestamp(dts[0]), len(ids)),
  629. (timestamp(dts[1]), 0),
  630. (timestamp(dts[2]), 0),
  631. (timestamp(dts[3]), 0),
  632. ]
  633. }
  634. def test_range_groups(self):
  635. dts = [self.now + timedelta(hours=i) for i in range(4)]
  636. # Multiple groups
  637. assert self.db.get_range(
  638. TSDBModel.group_performance,
  639. [self.proj1group1.id, self.proj1group2.id],
  640. dts[0],
  641. dts[-1],
  642. rollup=3600,
  643. ) == {
  644. self.proj1group1.id: [
  645. (timestamp(dts[0]), 3),
  646. (timestamp(dts[1]), 3),
  647. (timestamp(dts[2]), 3),
  648. (timestamp(dts[3]), 3),
  649. ],
  650. self.proj1group2.id: [
  651. (timestamp(dts[0]), 3),
  652. (timestamp(dts[1]), 3),
  653. (timestamp(dts[2]), 3),
  654. (timestamp(dts[3]), 3),
  655. ],
  656. }
  657. assert (
  658. self.db.get_range(TSDBModel.group_performance, [], dts[0], dts[-1], rollup=3600) == {}
  659. )
  660. @region_silo_test
  661. class SnubaTSDBGroupProfilingTest(TestCase, SnubaTestCase, SearchIssueTestMixin):
  662. def setUp(self):
  663. super().setUp()
  664. self.db = SnubaTSDB()
  665. self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
  666. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  667. )
  668. self.proj1 = self.create_project()
  669. self.env1 = Environment.objects.get_or_create(
  670. organization_id=self.proj1.organization_id, name="test"
  671. )[0]
  672. self.env2 = Environment.objects.get_or_create(
  673. organization_id=self.proj1.organization_id, name="dev"
  674. )[0]
  675. defaultenv = ""
  676. group1_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group1"
  677. group2_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group2"
  678. groups = {}
  679. for r in range(0, 14400, 600): # Every 10 min for 4 hours
  680. event, occurrence, group_info = self.store_search_issue(
  681. project_id=self.proj1.id,
  682. # change every 55 min so some hours have 1 user, some have 2
  683. user_id=r // 3300,
  684. fingerprints=[group1_fingerprint] if ((r // 600) % 2) else [group2_fingerprint],
  685. # release_version=str(r // 3600) * 10, # 1 per hour,
  686. environment=[self.env1.name, None][(r // 7200) % 3],
  687. insert_time=self.now + timedelta(seconds=r),
  688. )
  689. if group_info:
  690. groups[group_info.group.id] = group_info.group
  691. all_groups = list(groups.values())
  692. self.proj1group1 = all_groups[0]
  693. self.proj1group2 = all_groups[1]
  694. self.defaultenv = Environment.objects.get(name=defaultenv)
  695. def test_range_group_manual_group_time_rollup(self):
  696. project = self.create_project()
  697. # these are the only granularities/rollups that be actually be used
  698. GRANULARITIES = [
  699. (10, timedelta(seconds=10), 5),
  700. (60 * 60, timedelta(hours=1), 6),
  701. (60 * 60 * 24, timedelta(days=1), 15),
  702. ]
  703. start = (datetime.now(timezone.utc) - timedelta(days=15)).replace(
  704. hour=0, minute=0, second=0
  705. )
  706. for step, delta, times in GRANULARITIES:
  707. series = [start + (delta * i) for i in range(times)]
  708. series_ts = [int(to_timestamp(ts)) for ts in series]
  709. assert self.db.get_optimal_rollup(series[0], series[-1]) == step
  710. assert self.db.get_optimal_rollup_series(series[0], end=series[-1], rollup=None) == (
  711. step,
  712. series_ts,
  713. )
  714. for time_step in series:
  715. _, _, group_info = self.store_search_issue(
  716. project_id=project.id,
  717. user_id=0,
  718. fingerprints=[f"test_range_group_manual_group_time_rollup-{step}"],
  719. environment=None,
  720. insert_time=time_step,
  721. )
  722. assert self.db.get_range(
  723. TSDBModel.group_generic,
  724. [group_info.group.id],
  725. series[0],
  726. series[-1],
  727. rollup=None,
  728. ) == {group_info.group.id: [(ts, 1) for ts in series_ts]}
  729. def test_range_groups_mult(self):
  730. now = (datetime.utcnow() - timedelta(days=1)).replace(
  731. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  732. )
  733. dts = [now + timedelta(hours=i) for i in range(4)]
  734. project = self.create_project()
  735. group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group4"
  736. groups = []
  737. for i in range(0, 11):
  738. _, _, group_info = self.store_search_issue(
  739. project_id=project.id,
  740. user_id=0,
  741. fingerprints=[group_fingerprint],
  742. environment=None,
  743. insert_time=now + timedelta(minutes=i * 10),
  744. )
  745. if group_info:
  746. groups.append(group_info.group)
  747. group = groups[0]
  748. assert self.db.get_range(
  749. TSDBModel.group_generic,
  750. [group.id],
  751. dts[0],
  752. dts[-1],
  753. rollup=3600,
  754. ) == {
  755. group.id: [
  756. (timestamp(dts[0]), 6),
  757. (timestamp(dts[1]), 5),
  758. (timestamp(dts[2]), 0),
  759. (timestamp(dts[3]), 0),
  760. ]
  761. }
  762. def test_range_groups_simple(self):
  763. project = self.create_project()
  764. now = (datetime.utcnow() - timedelta(days=1)).replace(
  765. hour=10, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  766. )
  767. group_fingerprint = f"{GroupType.PROFILE_BLOCKED_THREAD.value}-group5"
  768. ids = [1, 2, 3, 4, 5]
  769. groups = []
  770. for r in ids:
  771. # for r in range(0, 9, 1):
  772. event, occurrence, group_info = self.store_search_issue(
  773. project_id=project.id,
  774. # change every 55 min so some hours have 1 user, some have 2
  775. user_id=r,
  776. fingerprints=[group_fingerprint],
  777. environment=None,
  778. # release_version=str(r // 3600) * 10, # 1 per hour,
  779. insert_time=now,
  780. )
  781. if group_info:
  782. groups.append(group_info.group)
  783. group = groups[0]
  784. dts = [now + timedelta(hours=i) for i in range(4)]
  785. assert self.db.get_range(
  786. TSDBModel.group_generic,
  787. [group.id],
  788. dts[0],
  789. dts[-1],
  790. rollup=3600,
  791. ) == {
  792. group.id: [
  793. (timestamp(dts[0]), len(ids)),
  794. (timestamp(dts[1]), 0),
  795. (timestamp(dts[2]), 0),
  796. (timestamp(dts[3]), 0),
  797. ]
  798. }
  799. def test_range_groups(self):
  800. dts = [self.now + timedelta(hours=i) for i in range(4)]
  801. # Multiple groups
  802. assert self.db.get_range(
  803. TSDBModel.group_generic,
  804. [self.proj1group1.id, self.proj1group2.id],
  805. dts[0],
  806. dts[-1],
  807. rollup=3600,
  808. ) == {
  809. self.proj1group1.id: [
  810. (timestamp(dts[0]), 3),
  811. (timestamp(dts[1]), 3),
  812. (timestamp(dts[2]), 3),
  813. (timestamp(dts[3]), 3),
  814. ],
  815. self.proj1group2.id: [
  816. (timestamp(dts[0]), 3),
  817. (timestamp(dts[1]), 3),
  818. (timestamp(dts[2]), 3),
  819. (timestamp(dts[3]), 3),
  820. ],
  821. }
  822. assert self.db.get_range(TSDBModel.group_generic, [], dts[0], dts[-1], rollup=3600) == {}
  823. def test_get_distinct_counts_totals_users(self):
  824. assert self.db.get_distinct_counts_totals(
  825. TSDBModel.users_affected_by_generic_group,
  826. [self.proj1group1.id],
  827. self.now,
  828. self.now + timedelta(hours=4),
  829. rollup=3600,
  830. ) == {
  831. self.proj1group1.id: 5 # 5 unique users overall
  832. }
  833. assert self.db.get_distinct_counts_totals(
  834. TSDBModel.users_affected_by_generic_group,
  835. [self.proj1group1.id],
  836. self.now,
  837. self.now,
  838. rollup=3600,
  839. ) == {
  840. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  841. }
  842. assert (
  843. self.db.get_distinct_counts_totals(
  844. TSDBModel.users_affected_by_generic_group,
  845. [],
  846. self.now,
  847. self.now + timedelta(hours=4),
  848. rollup=3600,
  849. )
  850. == {}
  851. )
  852. def test_get_sums(self):
  853. assert self.db.get_sums(
  854. model=TSDBModel.group_generic,
  855. keys=[self.proj1group1.id, self.proj1group2.id],
  856. start=self.now,
  857. end=self.now + timedelta(hours=4),
  858. ) == {self.proj1group1.id: 12, self.proj1group2.id: 12}
  859. def test_get_data_or_conditions_parsed(self):
  860. """
  861. Verify parsing the legacy format with nested OR conditions works
  862. """
  863. conditions = [
  864. # or conditions in the legacy format needs open and close brackets for precedence
  865. # there's some special casing when parsing conditions that specifically handles this
  866. [
  867. [["isNull", ["environment"]], "=", 1],
  868. ["environment", "IN", [self.env1.name]],
  869. ]
  870. ]
  871. data1 = self.db.get_data(
  872. model=TSDBModel.group_generic,
  873. keys=[self.proj1group1.id, self.proj1group2.id],
  874. conditions=conditions,
  875. start=self.now,
  876. end=self.now + timedelta(hours=4),
  877. )
  878. data2 = self.db.get_data(
  879. model=TSDBModel.group_generic,
  880. keys=[self.proj1group1.id, self.proj1group2.id],
  881. start=self.now,
  882. end=self.now + timedelta(hours=4),
  883. )
  884. # the above queries should return the same data since all groups either have:
  885. # environment=None or environment=test
  886. # so the condition really shouldn't be filtering anything
  887. assert data1 == data2
  888. class AddJitterToSeriesTest(TestCase):
  889. def setUp(self):
  890. self.db = SnubaTSDB()
  891. def run_test(self, end, interval, jitter, expected_start, expected_end):
  892. end = end.replace(tzinfo=pytz.UTC)
  893. start = end - interval
  894. rollup, rollup_series = self.db.get_optimal_rollup_series(start, end)
  895. series = self.db._add_jitter_to_series(rollup_series, start, rollup, jitter)
  896. assert to_datetime(series[0]) == expected_start.replace(tzinfo=pytz.UTC)
  897. assert to_datetime(series[-1]) == expected_end.replace(tzinfo=pytz.UTC)
  898. def test(self):
  899. self.run_test(
  900. end=datetime(2022, 5, 18, 10, 23, 4),
  901. interval=timedelta(hours=1),
  902. jitter=5,
  903. expected_start=datetime(2022, 5, 18, 9, 22, 55),
  904. expected_end=datetime(2022, 5, 18, 10, 22, 55),
  905. )
  906. self.run_test(
  907. end=datetime(2022, 5, 18, 10, 23, 8),
  908. interval=timedelta(hours=1),
  909. jitter=5,
  910. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  911. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  912. )
  913. # Jitter should be the same
  914. self.run_test(
  915. end=datetime(2022, 5, 18, 10, 23, 8),
  916. interval=timedelta(hours=1),
  917. jitter=55,
  918. expected_start=datetime(2022, 5, 18, 9, 23, 5),
  919. expected_end=datetime(2022, 5, 18, 10, 23, 5),
  920. )
  921. self.run_test(
  922. end=datetime(2022, 5, 18, 22, 33, 2),
  923. interval=timedelta(minutes=1),
  924. jitter=3,
  925. expected_start=datetime(2022, 5, 18, 22, 31, 53),
  926. expected_end=datetime(2022, 5, 18, 22, 32, 53),
  927. )
  928. def test_empty_series(self):
  929. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, 127) == []
  930. assert self.db._add_jitter_to_series([], datetime(2022, 5, 18, 10, 23, 4), 60, None) == []