Browse Source

test(snuba): Tests for SnubaTSDB that actually hit snuba (#8105)

Tests that hit a real snuba backend. Also a small logic change that creates the right set of bounds for TSDB queries so that they align with the buckets used in RedisTSDB
Alex Hofsteede 6 years ago
parent
commit
67f59cdb8e

+ 10 - 2
src/sentry/tsdb/snuba.py

@@ -4,6 +4,7 @@ import six
 
 from sentry.tsdb.base import BaseTSDB, TSDBModel
 from sentry.utils import snuba
+from sentry.utils.dates import to_datetime
 
 
 class SnubaTSDB(BaseTSDB):
@@ -27,7 +28,6 @@ class SnubaTSDB(BaseTSDB):
         TSDBModel.release: ('release', None),
         TSDBModel.users_affected_by_group: ('issue', 'user_id'),
         TSDBModel.users_affected_by_project: ('project_id', 'user_id'),
-        TSDBModel.users_affected_by_project: ('project_id', 'user_id'),
         TSDBModel.frequent_environments_by_group: ('issue', 'environment'),
         TSDBModel.frequent_releases_by_group: ('issue', 'release'),
         TSDBModel.frequent_issues_by_project: ('project_id', 'issue'),
@@ -68,6 +68,14 @@ class SnubaTSDB(BaseTSDB):
             keys_map['environment'] = [environment_id]
 
         aggregations = [[aggregation, model_aggregate, 'aggregate']]
+
+        # For historical compatibility with bucket-counted TSDB implementations
+        # we grab the original bucketed series and add the rollup time to the
+        # timestamp of the last bucket to get the end time.
+        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
+        start = to_datetime(series[0])
+        end = to_datetime(series[-1] + rollup)
+
         return snuba.query(start, end, groupby, None, keys_map,
                            aggregations, rollup)
 
@@ -146,7 +154,7 @@ class SnubaTSDB(BaseTSDB):
         return self.get_data(model, items, start, end, rollup, environment_id,
                              aggregation='count()')
 
-    def get_optimal_rollup(self, start):
+    def get_optimal_rollup(self, start, end=None):
         """
         Always return the smallest rollup as we can bucket on any granularity.
         """

+ 11 - 4
tests/sentry/tsdb/test_snuba.py

@@ -44,7 +44,14 @@ def has_shape(data, shape, allow_empty=False):
         return True
 
 
-class SnubaTSDBTest(TestCase):
+class SnubaTSDBRequestsTest(TestCase):
+    """
+    Tests that the Snuba TSDB backend makes correctly formatted requests
+    to the Snuba service, and formats the results correctly.
+
+    Mocks the Snuba service request/response.
+    """
+
     def setUp(self):
         self.db = SnubaTSDB()
 
@@ -108,7 +115,7 @@ class SnubaTSDBTest(TestCase):
             assert has_shape(results, 1)
 
     @responses.activate
-    def test_groups(self):
+    def test_groups_request(self):
         now = parse_datetime('2018-03-09T01:00:00Z')
         dts = [now + timedelta(hours=i) for i in range(4)]
         project = self.create_project()
@@ -138,7 +145,7 @@ class SnubaTSDBTest(TestCase):
             assert results is not None
 
     @responses.activate
-    def test_releases(self):
+    def test_releases_request(self):
         now = parse_datetime('2018-03-09T01:00:00Z')
         project = self.create_project()
         release = Release.objects.create(
@@ -166,7 +173,7 @@ class SnubaTSDBTest(TestCase):
             assert results == {release.id: [(to_timestamp(now), 100)]}
 
     @responses.activate
-    def test_environment(self):
+    def test_environment_request(self):
         now = parse_datetime('2018-03-09T01:00:00Z')
         project = self.create_project()
         env = self.create_environment(project=project, name="prod")

+ 0 - 0
tests/snuba/tagstore/test_backend.py → tests/snuba/tagstore/test_tagstore_backend.py


+ 271 - 0
tests/snuba/tsdb/test_tsdb_backend.py

@@ -0,0 +1,271 @@
+from __future__ import absolute_import
+
+import calendar
+from datetime import datetime, timedelta
+import json
+import pytz
+import requests
+import six
+
+from sentry.models import GroupHash, Release
+from sentry.tsdb.base import TSDBModel
+from sentry.tsdb.snuba import SnubaTSDB
+from sentry.testutils import TestCase
+from sentry.utils import snuba
+from sentry.utils.dates import to_timestamp
+
+
+def timestamp(d):
+    t = int(to_timestamp(d))
+    return t - (t % 3600)
+
+
+class SnubaTSDBTest(TestCase):
+    def setUp(self):
+        assert requests.post(snuba.SNUBA + '/tests/drop').status_code == 200
+
+        self.db = SnubaTSDB()
+        self.now = datetime.utcnow().replace(
+            hour=0,
+            minute=0,
+            second=0,
+            microsecond=0,
+            tzinfo=pytz.UTC
+        )
+
+        self.proj1 = self.create_project()
+        self.proj1env1 = self.create_environment(project=self.proj1, name='test')
+        self.proj1env2 = self.create_environment(project=self.proj1, name='dev')
+
+        self.proj1group1 = self.create_group(self.proj1)
+        self.proj1group2 = self.create_group(self.proj1)
+
+        hash1 = '1' * 32
+        hash2 = '2' * 32
+        GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1)
+        GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2)
+
+        self.release = Release.objects.create(
+            organization_id=self.organization.id,
+            version=1,
+            date_added=self.now,
+        )
+        self.release.add_project(self.proj1)
+
+        data = json.dumps([{
+            'event_id': (six.text_type(r) * 32)[:32],
+            'primary_hash': [hash1, hash2][(r // 600) % 2],
+            'project_id': self.proj1.id,
+            'message': 'message 1',
+            'platform': 'python',
+            'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
+            'data': {
+                'received': calendar.timegm(self.now.timetuple()) + r,
+                'tags': {
+                    'foo': 'bar',
+                    'baz': 'quux',
+                    'environment': self.proj1env1.name,
+                    'sentry:release': r // 3600,  # 1 per hour
+                },
+                'sentry.interfaces.User': {
+                    # change every 55 min so some hours have 1 user, some have 2
+                    'id': "user{}".format(r // 3300),
+                    'email': "user{}@sentry.io".format(r)
+                }
+            },
+        } for r in range(0, 14400, 600)])  # Every 10 min for 4 hours
+
+        assert requests.post(snuba.SNUBA + '/tests/insert', data=data).status_code == 200
+
+    def test_range_groups(self):
+        dts = [self.now + timedelta(hours=i) for i in range(4)]
+        assert self.db.get_range(
+            TSDBModel.group,
+            [self.proj1group1.id],
+            dts[0], dts[-1],
+            rollup=3600
+        ) == {
+            self.proj1group1.id: [
+                (timestamp(dts[0]), 3),
+                (timestamp(dts[1]), 3),
+                (timestamp(dts[2]), 3),
+                (timestamp(dts[3]), 3),
+            ],
+        }
+
+        # Multiple groups
+        assert self.db.get_range(
+            TSDBModel.group,
+            [self.proj1group1.id, self.proj1group2.id],
+            dts[0], dts[-1],
+            rollup=3600
+        ) == {
+            self.proj1group1.id: [
+                (timestamp(dts[0]), 3),
+                (timestamp(dts[1]), 3),
+                (timestamp(dts[2]), 3),
+                (timestamp(dts[3]), 3),
+            ],
+            self.proj1group2.id: [
+                (timestamp(dts[0]), 3),
+                (timestamp(dts[1]), 3),
+                (timestamp(dts[2]), 3),
+                (timestamp(dts[3]), 3),
+            ],
+        }
+
+    def test_range_releases(self):
+        dts = [self.now + timedelta(hours=i) for i in range(4)]
+        assert self.db.get_range(
+            TSDBModel.release,
+            [self.release.id],
+            dts[0], dts[-1],
+            rollup=3600
+        ) == {
+            self.release.id: [
+                (timestamp(dts[1]), 6),
+            ]
+        }
+
+    def test_range_project(self):
+        dts = [self.now + timedelta(hours=i) for i in range(4)]
+        assert self.db.get_range(
+            TSDBModel.project,
+            [self.proj1.id],
+            dts[0], dts[-1],
+            rollup=3600
+        ) == {
+            self.proj1.id: [
+                (timestamp(dts[0]), 6),
+                (timestamp(dts[1]), 6),
+                (timestamp(dts[2]), 6),
+                (timestamp(dts[3]), 6),
+            ]
+        }
+
+        assert self.db.get_range(
+            TSDBModel.project,
+            [self.proj1.id],
+            dts[0], dts[-1],
+            rollup=3600,
+            environment_id=self.proj1env1.id
+        ) == {
+            self.proj1.id: [
+                (timestamp(dts[0]), 6),
+                (timestamp(dts[1]), 6),
+                (timestamp(dts[2]), 6),
+                (timestamp(dts[3]), 6),
+            ]
+        }
+
+        # No events submitted for env2
+        assert self.db.get_range(
+            TSDBModel.project,
+            [self.proj1.id],
+            dts[0], dts[-1],
+            rollup=3600,
+            environment_id=self.proj1env2.id
+        ) == {}
+
+    def test_range_rollups(self):
+        # Daily
+        daystart = self.now.replace(hour=0)  # day buckets start on day boundaries
+        dts = [daystart + timedelta(days=i) for i in range(2)]
+        assert self.db.get_range(
+            TSDBModel.project,
+            [self.proj1.id],
+            dts[0], dts[-1],
+            rollup=86400
+        ) == {
+            self.proj1.id: [
+                (timestamp(dts[0]), 24),
+            ]
+        }
+
+        # Minutely
+        dts = [self.now + timedelta(minutes=i) for i in range(120)]
+        expected = [(to_timestamp(d), 1) for i, d in enumerate(dts) if i % 10 == 0]
+        assert self.db.get_range(
+            TSDBModel.project,
+            [self.proj1.id],
+            dts[0], dts[-1],
+            rollup=60
+        ) == {
+            self.proj1.id: expected
+        }
+
+    def test_distinct_counts_series_users(self):
+        dts = [self.now + timedelta(hours=i) for i in range(4)]
+        assert self.db.get_distinct_counts_series(
+            TSDBModel.users_affected_by_group,
+            [self.proj1group1.id],
+            dts[0], dts[-1],
+            rollup=3600
+        ) == {
+            self.proj1group1.id: [
+                (timestamp(dts[0]), 1),
+                (timestamp(dts[1]), 1),
+                (timestamp(dts[2]), 1),
+                (timestamp(dts[3]), 2),
+            ],
+        }
+
+        dts = [self.now + timedelta(hours=i) for i in range(4)]
+        assert self.db.get_distinct_counts_series(
+            TSDBModel.users_affected_by_project,
+            [self.proj1.id],
+            dts[0], dts[-1],
+            rollup=3600
+        ) == {
+            self.proj1.id: [
+                (timestamp(dts[0]), 1),
+                (timestamp(dts[1]), 2),
+                (timestamp(dts[2]), 2),
+                (timestamp(dts[3]), 2),
+            ],
+        }
+
+    def get_distinct_counts_totals_users(self):
+        assert self.db.get_distinct_counts_totals(
+            TSDBModel.users_affected_by_group,
+            [self.proj1group1.id],
+            self.now,
+            self.now + timedelta(hours=4),
+            rollup=3600
+        ) == {
+            self.proj1group1.id: 2,  # 2 unique users overall
+        }
+
+        assert self.db.get_distinct_counts_totals(
+            TSDBModel.users_affected_by_group,
+            [self.proj1group1.id],
+            self.now,
+            self.now,
+            rollup=3600
+        ) == {
+            self.proj1group1.id: 1,  # Only 1 unique user in the first hour
+        }
+
+        assert self.db.get_distinct_counts_totals(
+            TSDBModel.users_affected_by_project,
+            [self.proj1.id],
+            self.now,
+            self.now + timedelta(hours=4),
+            rollup=3600
+        ) == {
+            self.proj1.id: 2,
+        }
+
+    def test_frequency_releases(self):
+        assert self.db.get_most_frequent(
+            TSDBModel.frequent_issues_by_project,
+            [self.proj1.id],
+            self.now,
+            self.now + timedelta(hours=4),
+            rollup=3600,
+        ) == {
+            self.proj1.id: [
+                (self.proj1group1.id, 2.0),
+                (self.proj1group2.id, 1.0),
+            ],
+        }