test_tsdb_backend.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. from __future__ import absolute_import
  2. import calendar
  3. from datetime import datetime, timedelta
  4. import json
  5. import pytz
  6. import requests
  7. import six
  8. from django.conf import settings
  9. from mock import patch
  10. from sentry.models import GroupHash, GroupRelease, Release
  11. from sentry.tsdb.base import TSDBModel
  12. from sentry.tsdb.snuba import SnubaTSDB
  13. from sentry.testutils import TestCase, SnubaTestCase
  14. from sentry.utils.dates import to_timestamp
  15. def timestamp(d):
  16. t = int(to_timestamp(d))
  17. return t - (t % 3600)
  18. def has_shape(data, shape, allow_empty=False):
  19. """
  20. Determine if a data object has the provided shape
  21. At any level, the object in `data` and in `shape` must have the same type.
  22. A dict is the same shape if all its keys and values have the same shape as the
  23. key/value in `shape`. The number of keys/values is not relevant.
  24. A list is the same shape if all its items have the same shape as the value
  25. in `shape`
  26. A tuple is the same shape if it has the same length as `shape` and all the
  27. values have the same shape as the corresponding value in `shape`
  28. Any other object simply has to have the same type.
  29. If `allow_empty` is set, lists and dicts in `data` will pass even if they are empty.
  30. """
  31. if not isinstance(data, type(shape)):
  32. return False
  33. if isinstance(data, dict):
  34. return (
  35. (allow_empty or len(data) > 0)
  36. and all(has_shape(k, shape.keys()[0]) for k in data.keys())
  37. and all(has_shape(v, shape.values()[0]) for v in data.values())
  38. )
  39. elif isinstance(data, list):
  40. return (allow_empty or len(data) > 0) and all(has_shape(v, shape[0]) for v in data)
  41. elif isinstance(data, tuple):
  42. return len(data) == len(shape) and all(
  43. has_shape(data[i], shape[i]) for i in range(len(data))
  44. )
  45. else:
  46. return True
  47. class SnubaTSDBTest(TestCase, SnubaTestCase):
  48. def setUp(self):
  49. super(SnubaTSDBTest, self).setUp()
  50. self.db = SnubaTSDB()
  51. self.now = datetime.utcnow().replace(
  52. hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC
  53. )
  54. self.proj1 = self.create_project()
  55. self.proj1env1 = self.create_environment(project=self.proj1, name="test")
  56. self.proj1env2 = self.create_environment(project=self.proj1, name="dev")
  57. self.proj1env3 = self.create_environment(project=self.proj1, name="staging")
  58. self.proj1defaultenv = self.create_environment(project=self.proj1, name="")
  59. self.proj1group1 = self.create_group(self.proj1)
  60. self.proj1group2 = self.create_group(self.proj1)
  61. hash1 = "1" * 32
  62. hash2 = "2" * 32
  63. GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1)
  64. GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2)
  65. self.release1 = Release.objects.create(
  66. organization_id=self.organization.id, version="1" * 10, date_added=self.now
  67. )
  68. self.release1.add_project(self.proj1)
  69. self.release2 = Release.objects.create(
  70. organization_id=self.organization.id, version="2" * 10, date_added=self.now
  71. )
  72. self.release2.add_project(self.proj1)
  73. self.group1release1 = GroupRelease.objects.create(
  74. project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id
  75. )
  76. self.group1release2 = GroupRelease.objects.create(
  77. project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id
  78. )
  79. self.group2release1 = GroupRelease.objects.create(
  80. project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id
  81. )
  82. data = json.dumps(
  83. [
  84. {
  85. "event_id": (six.text_type(r) * 32)[:32],
  86. "primary_hash": [hash1, hash2][(r // 600) % 2], # Switch every 10 mins
  87. "group_id": [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2],
  88. "project_id": self.proj1.id,
  89. "message": "message 1",
  90. "platform": "python",
  91. "datetime": (self.now + timedelta(seconds=r)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
  92. "data": {
  93. "received": calendar.timegm(self.now.timetuple()) + r,
  94. "tags": {
  95. "foo": "bar",
  96. "baz": "quux",
  97. # Switch every 2 hours
  98. "environment": [self.proj1env1.name, None][(r // 7200) % 3],
  99. "sentry:user": u"id:user{}".format(r // 3300),
  100. "sentry:release": six.text_type(r // 3600) * 10, # 1 per hour
  101. },
  102. "user": {
  103. # change every 55 min so some hours have 1 user, some have 2
  104. "id": u"user{}".format(r // 3300),
  105. "email": u"user{}@sentry.io".format(r),
  106. },
  107. },
  108. }
  109. for r in range(0, 14400, 600)
  110. ]
  111. ) # Every 10 min for 4 hours
  112. assert (
  113. requests.post(settings.SENTRY_SNUBA + "/tests/events/insert", data=data).status_code
  114. == 200
  115. )
  116. # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish
  117. self.proj1group1.first_seen = self.now
  118. self.proj1group1.last_seen = self.now + timedelta(seconds=14400)
  119. self.proj1group1.save()
  120. self.proj1group2.first_seen = self.now
  121. self.proj1group2.last_seen = self.now + timedelta(seconds=14400)
  122. self.proj1group2.save()
  123. def test_range_groups(self):
  124. dts = [self.now + timedelta(hours=i) for i in range(4)]
  125. assert self.db.get_range(
  126. TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  127. ) == {
  128. self.proj1group1.id: [
  129. (timestamp(dts[0]), 3),
  130. (timestamp(dts[1]), 3),
  131. (timestamp(dts[2]), 3),
  132. (timestamp(dts[3]), 3),
  133. ]
  134. }
  135. # Multiple groups
  136. assert self.db.get_range(
  137. TSDBModel.group,
  138. [self.proj1group1.id, self.proj1group2.id],
  139. dts[0],
  140. dts[-1],
  141. rollup=3600,
  142. ) == {
  143. self.proj1group1.id: [
  144. (timestamp(dts[0]), 3),
  145. (timestamp(dts[1]), 3),
  146. (timestamp(dts[2]), 3),
  147. (timestamp(dts[3]), 3),
  148. ],
  149. self.proj1group2.id: [
  150. (timestamp(dts[0]), 3),
  151. (timestamp(dts[1]), 3),
  152. (timestamp(dts[2]), 3),
  153. (timestamp(dts[3]), 3),
  154. ],
  155. }
  156. assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {}
  157. def test_range_releases(self):
  158. dts = [self.now + timedelta(hours=i) for i in range(4)]
  159. assert self.db.get_range(
  160. TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600
  161. ) == {
  162. self.release1.id: [
  163. (timestamp(dts[0]), 0),
  164. (timestamp(dts[1]), 6),
  165. (timestamp(dts[2]), 0),
  166. (timestamp(dts[3]), 0),
  167. ]
  168. }
  169. def test_range_project(self):
  170. dts = [self.now + timedelta(hours=i) for i in range(4)]
  171. assert self.db.get_range(
  172. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  173. ) == {
  174. self.proj1.id: [
  175. (timestamp(dts[0]), 6),
  176. (timestamp(dts[1]), 6),
  177. (timestamp(dts[2]), 6),
  178. (timestamp(dts[3]), 6),
  179. ]
  180. }
  181. def test_range_environment_filter(self):
  182. dts = [self.now + timedelta(hours=i) for i in range(4)]
  183. assert self.db.get_range(
  184. TSDBModel.project,
  185. [self.proj1.id],
  186. dts[0],
  187. dts[-1],
  188. rollup=3600,
  189. environment_ids=[self.proj1env1.id],
  190. ) == {
  191. self.proj1.id: [
  192. (timestamp(dts[0]), 6),
  193. (timestamp(dts[1]), 6),
  194. (timestamp(dts[2]), 0),
  195. (timestamp(dts[3]), 0),
  196. ]
  197. }
  198. # No events submitted for env2
  199. assert self.db.get_range(
  200. TSDBModel.project,
  201. [self.proj1.id],
  202. dts[0],
  203. dts[-1],
  204. rollup=3600,
  205. environment_ids=[self.proj1env2.id],
  206. ) == {
  207. self.proj1.id: [
  208. (timestamp(dts[0]), 0),
  209. (timestamp(dts[1]), 0),
  210. (timestamp(dts[2]), 0),
  211. (timestamp(dts[3]), 0),
  212. ]
  213. }
  214. # Events submitted with no environment should match default environment
  215. assert self.db.get_range(
  216. TSDBModel.project,
  217. [self.proj1.id],
  218. dts[0],
  219. dts[-1],
  220. rollup=3600,
  221. environment_ids=[self.proj1defaultenv.id],
  222. ) == {
  223. self.proj1.id: [
  224. (timestamp(dts[0]), 0),
  225. (timestamp(dts[1]), 0),
  226. (timestamp(dts[2]), 6),
  227. (timestamp(dts[3]), 6),
  228. ]
  229. }
  230. def test_range_rollups(self):
  231. # Daily
  232. daystart = self.now.replace(hour=0) # day buckets start on day boundaries
  233. dts = [daystart + timedelta(days=i) for i in range(2)]
  234. assert self.db.get_range(
  235. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400
  236. ) == {self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)]}
  237. # Minutely
  238. dts = [self.now + timedelta(minutes=i) for i in range(120)]
  239. # Expect every 10th minute to have a 1, else 0
  240. expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)]
  241. assert self.db.get_range(
  242. TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60
  243. ) == {self.proj1.id: expected}
  244. def test_distinct_counts_series_users(self):
  245. dts = [self.now + timedelta(hours=i) for i in range(4)]
  246. assert self.db.get_distinct_counts_series(
  247. TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600
  248. ) == {
  249. self.proj1group1.id: [
  250. (timestamp(dts[0]), 1),
  251. (timestamp(dts[1]), 1),
  252. (timestamp(dts[2]), 1),
  253. (timestamp(dts[3]), 2),
  254. ]
  255. }
  256. dts = [self.now + timedelta(hours=i) for i in range(4)]
  257. assert self.db.get_distinct_counts_series(
  258. TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600
  259. ) == {
  260. self.proj1.id: [
  261. (timestamp(dts[0]), 1),
  262. (timestamp(dts[1]), 2),
  263. (timestamp(dts[2]), 2),
  264. (timestamp(dts[3]), 2),
  265. ]
  266. }
  267. assert (
  268. self.db.get_distinct_counts_series(
  269. TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600
  270. )
  271. == {}
  272. )
  273. def get_distinct_counts_totals_users(self):
  274. assert self.db.get_distinct_counts_totals(
  275. TSDBModel.users_affected_by_group,
  276. [self.proj1group1.id],
  277. self.now,
  278. self.now + timedelta(hours=4),
  279. rollup=3600,
  280. ) == {
  281. self.proj1group1.id: 2 # 2 unique users overall
  282. }
  283. assert self.db.get_distinct_counts_totals(
  284. TSDBModel.users_affected_by_group,
  285. [self.proj1group1.id],
  286. self.now,
  287. self.now,
  288. rollup=3600,
  289. ) == {
  290. self.proj1group1.id: 1 # Only 1 unique user in the first hour
  291. }
  292. assert self.db.get_distinct_counts_totals(
  293. TSDBModel.users_affected_by_project,
  294. [self.proj1.id],
  295. self.now,
  296. self.now + timedelta(hours=4),
  297. rollup=3600,
  298. ) == {self.proj1.id: 2}
  299. assert (
  300. self.db.get_distinct_counts_totals(
  301. TSDBModel.users_affected_by_group,
  302. [],
  303. self.now,
  304. self.now + timedelta(hours=4),
  305. rollup=3600,
  306. )
  307. == {}
  308. )
  309. def test_most_frequent(self):
  310. assert self.db.get_most_frequent(
  311. TSDBModel.frequent_issues_by_project,
  312. [self.proj1.id],
  313. self.now,
  314. self.now + timedelta(hours=4),
  315. rollup=3600,
  316. ) == {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]}
  317. assert (
  318. self.db.get_most_frequent(
  319. TSDBModel.frequent_issues_by_project,
  320. [],
  321. self.now,
  322. self.now + timedelta(hours=4),
  323. rollup=3600,
  324. )
  325. == {}
  326. )
  327. def test_frequency_series(self):
  328. dts = [self.now + timedelta(hours=i) for i in range(4)]
  329. assert self.db.get_frequency_series(
  330. TSDBModel.frequent_releases_by_group,
  331. {
  332. self.proj1group1.id: (self.group1release1.id, self.group1release2.id),
  333. self.proj1group2.id: (self.group2release1.id,),
  334. },
  335. dts[0],
  336. dts[-1],
  337. rollup=3600,
  338. ) == {
  339. self.proj1group1.id: [
  340. (timestamp(dts[0]), {self.group1release1.id: 0, self.group1release2.id: 0}),
  341. (timestamp(dts[1]), {self.group1release1.id: 3, self.group1release2.id: 0}),
  342. (timestamp(dts[2]), {self.group1release1.id: 0, self.group1release2.id: 3}),
  343. (timestamp(dts[3]), {self.group1release1.id: 0, self.group1release2.id: 0}),
  344. ],
  345. self.proj1group2.id: [
  346. (timestamp(dts[0]), {self.group2release1.id: 0}),
  347. (timestamp(dts[1]), {self.group2release1.id: 3}),
  348. (timestamp(dts[2]), {self.group2release1.id: 0}),
  349. (timestamp(dts[3]), {self.group2release1.id: 0}),
  350. ],
  351. }
  352. assert (
  353. self.db.get_frequency_series(
  354. TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600
  355. )
  356. == {}
  357. )
  358. def test_result_shape(self):
  359. """
  360. Tests that the results from the different TSDB methods have the
  361. expected format.
  362. """
  363. project_id = self.proj1.id
  364. dts = [self.now + timedelta(hours=i) for i in range(4)]
  365. results = self.db.get_most_frequent(
  366. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  367. )
  368. assert has_shape(results, {1: [(1, 1.0)]})
  369. results = self.db.get_most_frequent_series(
  370. TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]
  371. )
  372. assert has_shape(results, {1: [(1, {1: 1.0})]})
  373. items = {
  374. # {project_id: (issue_id, issue_id, ...)}
  375. project_id: (self.proj1group1.id, self.proj1group2.id)
  376. }
  377. results = self.db.get_frequency_series(
  378. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  379. )
  380. assert has_shape(results, {1: [(1, {1: 1})]})
  381. results = self.db.get_frequency_totals(
  382. TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]
  383. )
  384. assert has_shape(results, {1: {1: 1}})
  385. results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
  386. assert has_shape(results, {1: [(1, 1)]})
  387. results = self.db.get_distinct_counts_series(
  388. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  389. )
  390. assert has_shape(results, {1: [(1, 1)]})
  391. results = self.db.get_distinct_counts_totals(
  392. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  393. )
  394. assert has_shape(results, {1: 1})
  395. results = self.db.get_distinct_counts_union(
  396. TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]
  397. )
  398. assert has_shape(results, 1)
  399. def test_calculated_limit(self):
  400. with patch("sentry.tsdb.snuba.snuba") as snuba:
  401. # 24h test
  402. rollup = 3600
  403. end = self.now
  404. start = end + timedelta(days=-1, seconds=rollup)
  405. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  406. assert snuba.query.call_args[1]["limit"] == 120
  407. # 14 day test
  408. rollup = 86400
  409. start = end + timedelta(days=-14, seconds=rollup)
  410. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  411. assert snuba.query.call_args[1]["limit"] == 70
  412. # 1h test
  413. rollup = 3600
  414. end = self.now
  415. start = end + timedelta(hours=-1, seconds=rollup)
  416. self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup)
  417. assert snuba.query.call_args[1]["limit"] == 5