Browse Source

Change translation of filters and result rows (#8679)

Hopefully make it easier to add translators that convert between
Django object IDS and snuba column values, among other things like
parsing datetimes in the result.

+ Tests
Alex Hofsteede 6 years ago
parent
commit
b7e896ab3f

+ 3 - 2
src/sentry/tsdb/snuba.py

@@ -77,7 +77,8 @@ class SnubaTSDB(BaseTSDB):
         end = to_datetime(series[-1] + rollup)
 
         result = snuba.query(start, end, groupby, None, keys_map,
-                             aggregations, rollup, referrer='tsdb')
+                             aggregations, rollup, referrer='tsdb',
+                             is_grouprelease=(model == TSDBModel.frequent_releases_by_group))
 
         if group_on_time:
             keys_map['time'] = series
@@ -204,8 +205,8 @@ class SnubaTSDB(BaseTSDB):
         Returns a normalized set of keys based on the various formats accepted
         by TSDB methods. The input is either just a plain list of keys for the
         top level or a `{level1_key: [level2_key, ...]}` dictionary->list map.
+        The output is a 2-tuple of ([level_1_keys], [all_level_2_keys])
         """
-        # Flatten keys
         if isinstance(items, (list, tuple)):
             return (items, None)
         elif isinstance(items, dict):

+ 108 - 35
src/sentry/utils/snuba.py

@@ -13,7 +13,7 @@ from django.conf import settings
 from django.db.models import Q
 
 from sentry.event_manager import HASH_RE
-from sentry.models import Group, GroupHash, GroupHashTombstone, Environment, Release, ReleaseProject
+from sentry.models import Environment, Group, GroupHash, GroupHashTombstone, GroupRelease, Release, ReleaseProject
 from sentry.utils import metrics
 from sentry.utils.dates import to_timestamp
 from functools import reduce
@@ -41,7 +41,7 @@ _snuba_pool = urllib3.connectionpool.connection_from_url(
 
 def query(start, end, groupby, conditions=None, filter_keys=None,
           aggregations=None, rollup=None, arrayjoin=None, limit=None, orderby=None,
-          having=None, referrer=None):
+          having=None, referrer=None, is_grouprelease=False):
     """
     Sends a query to snuba.
 
@@ -68,20 +68,7 @@ def query(start, end, groupby, conditions=None, filter_keys=None,
 
     # Forward and reverse translation maps from model ids to snuba keys, per column
     with timer('get_snuba_map'):
-        snuba_map = {col: get_snuba_map(col, keys) for col, keys in six.iteritems(filter_keys)}
-        snuba_map = {k: v for k, v in six.iteritems(snuba_map) if k is not None and v is not None}
-        rev_snuba_map = {col: dict(reversed(i) for i in keys.items())
-                         for col, keys in six.iteritems(snuba_map)}
-
-    for col, keys in six.iteritems(filter_keys):
-        keys = [k for k in keys if k is not None]
-        if col in snuba_map:
-            keys = [snuba_map[col][k] for k in keys if k in snuba_map[col]]
-        if keys:
-            if len(keys) == 1 and keys[0] is None:
-                conditions.append((col, 'IS NULL', None))
-            else:
-                conditions.append((col, 'IN', keys))
+        forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease)
 
     if 'project_id' in filter_keys:
         # If we are given a set of project ids, use those directly.
@@ -94,6 +81,13 @@ def query(start, end, groupby, conditions=None, filter_keys=None,
     else:
         project_ids = []
 
+    for col, keys in six.iteritems(forward(filter_keys.copy())):
+        if keys:
+            if len(keys) == 1 and keys[0] is None:
+                conditions.append((col, 'IS NULL', None))
+            else:
+                conditions.append((col, 'IN', keys))
+
     if not project_ids:
         raise SnubaError("No project_id filter, or none could be inferred from other filters.")
 
@@ -151,12 +145,7 @@ def query(start, end, groupby, conditions=None, filter_keys=None,
     assert expected_cols == got_cols
 
     with timer('process_result'):
-        for d in body['data']:
-            if 'time' in d:
-                d['time'] = int(to_timestamp(parse_datetime(d['time'])))
-            for col in rev_snuba_map:
-                if col in d:
-                    d[col] = rev_snuba_map[col][d[col]]
+        body['data'] = [reverse(d) for d in body['data']]
         return nest_groups(body['data'], groupby, aggregate_cols)
 
 
@@ -177,7 +166,9 @@ def nest_groups(data, groups, aggregate_cols):
         inter = {}
         for d in data:
             inter.setdefault(d[g], []).append(d)
-        return {k: nest_groups(v, rest, aggregate_cols) for k, v in six.iteritems(inter)}
+        return {
+            k: nest_groups(v, rest, aggregate_cols) for k, v in six.iteritems(inter)
+        }
 
 
 def is_condition(cond_or_list):
@@ -187,6 +178,7 @@ def is_condition(cond_or_list):
 def flat_conditions(conditions):
     return list(chain(*[[c] if is_condition(c) else c for c in conditions]))
 
+
 # The following are functions for resolving information from sentry models
 # about projects, environments, and issues (groups). Having this snuba
 # implementation have to know about these relationships is not ideal, and
@@ -195,22 +187,103 @@ def flat_conditions(conditions):
 # is implemented here for simplicity.
 
 
-def get_snuba_map(column, ids):
+def get_snuba_translators(filter_keys, is_grouprelease=False):
     """
     Some models are stored differently in snuba, eg. as the environment
-    name instead of the the environment ID. Here we look up a set of keys
-    for a given model and return a lookup dictionary from those keys to the
-    equivalent ones in snuba.
+    name instead of the the environment ID. Here we create and return forward()
+    and reverse() translation functions that perform all the required changes.
+
+    forward() is designed to work on the filter_keys and so should be called
+    with a map of {column: [key1, key2], ...} and should return an updated map
+    with the filter keys replaced with the ones that Snuba expects.
+
+    reverse() is designed to work on result rows, so should be called with a row
+    in the form {column: value, ...} and will return a translated result row.
+
+    Because translation can potentially rely on combinations of different parts
+    of the result row, I decided to implement them as composable functions over the
+    row to be translated. This should make it simpler to add any other needed
+    translations as long as you can express them as forward(filters) and reverse(row)
+    functions.
     """
-    mappings = {
+
+    # Helper lambdas to compose translator functions
+    identity = (lambda x: x)
+    compose = (lambda f, g: lambda x: f(g(x)))
+    replace = (lambda d, key, val: d.update({key: val}) or d)
+
+    forward = identity
+    reverse = identity
+
+    map_columns = {
         'environment': (Environment, 'name', lambda name: None if name == '' else name),
-        'tags[sentry:release]': (Release, 'version', lambda name: name),
+        'tags[sentry:release]': (Release, 'version', identity),
     }
-    if column in mappings and ids:
-        model, field, transform = mappings[column]
-        objects = model.objects.filter(id__in=ids).values_list('id', field)
-        return {k: transform(v) for k, v in objects}
-    return None
+
+    for col, (model, field, fmt) in six.iteritems(map_columns):
+        fwd, rev = None, None
+        ids = filter_keys.get(col)
+        if not ids:
+            continue
+        if is_grouprelease and col == "tags[sentry:release]":
+            # GroupRelease -> Release translation is a special case because the
+            # translation relies on both the Group and Release value in the result row.
+            #
+            # We create a map of {grouprelease_id: (group_id, version), ...} and the corresponding
+            # reverse map of {(group_id, version): grouprelease_id, ...}
+            # NB this does depend on `issue` being defined in the query result, and the correct
+            # set of issues being resolved, which is outside the control of this function.
+            gr_map = GroupRelease.objects.filter(id__in=ids).values_list(
+                "id", "group_id", "release_id"
+            )
+            ver = dict(Release.objects.filter(id__in=[x[2] for x in gr_map]).values_list(
+                "id", "version"
+            ))
+            fwd_map = {gr: (group, ver[release]) for (gr, group, release) in gr_map}
+            rev_map = dict(reversed(t) for t in six.iteritems(fwd_map))
+            fwd = (
+                lambda col, trans: lambda filters: replace(
+                    filters, col, [trans[k][1] for k in filters[col]]
+                )
+            )(col, fwd_map)
+            rev = (
+                lambda col, trans: lambda row: replace(
+                    # The translate map may not have every combination of issue/release
+                    # returned by the query.
+                    row, col, trans.get((row["issue"], row[col]))
+                )
+            )(col, rev_map)
+
+        else:
+            fwd_map = {
+                k: fmt(v)
+                for k, v in model.objects.filter(id__in=ids).values_list("id", field)
+            }
+            rev_map = dict(reversed(t) for t in six.iteritems(fwd_map))
+            fwd = (
+                lambda col, trans: lambda filters: replace(
+                    filters, col, [trans[k] for k in filters[col] if k]
+                )
+            )(col, fwd_map)
+            rev = (
+                lambda col, trans: lambda row: replace(
+                    row, col, trans[row[col]]) if col in row else row
+            )(col, rev_map)
+
+        if fwd:
+            forward = compose(forward, fwd)
+        if rev:
+            reverse = compose(reverse, rev)
+
+    # Extra reverse translator for time column.
+    reverse = compose(
+        reverse,
+        lambda row: replace(row, "time", int(to_timestamp(parse_datetime(row["time"]))))
+        if "time" in row
+        else row,
+    )
+
+    return (forward, reverse)
 
 
 def get_project_issues(project_ids, issue_ids=None):
@@ -237,7 +310,7 @@ def get_project_issues(project_ids, issue_ids=None):
 
     tombstones = GroupHashTombstone.objects.filter(
         reduce(or_, (Q(project_id=pid, hash__in=hshes)
-                    for pid, hshes in six.iteritems(hashes_by_project)))
+                     for pid, hshes in six.iteritems(hashes_by_project)))
     )
 
     tombstones_by_project = {}

+ 167 - 0
tests/sentry/utils/test_snuba.py

@@ -0,0 +1,167 @@
+from __future__ import absolute_import
+
+from datetime import datetime
+import pytz
+
+from sentry.models import GroupRelease, Release
+from sentry.testutils import TestCase
+from sentry.utils.snuba import get_snuba_translators
+
+
+class SnubaUtilsTest(TestCase):
+
+    def setUp(self):
+        self.now = datetime.utcnow().replace(
+            hour=0,
+            minute=0,
+            second=0,
+            microsecond=0,
+            tzinfo=pytz.UTC
+        )
+        self.proj1 = self.create_project()
+        self.proj1env1 = self.create_environment(project=self.proj1, name='prod')
+        self.proj1group1 = self.create_group(self.proj1)
+        self.proj1group2 = self.create_group(self.proj1)
+
+        self.release1 = Release.objects.create(
+            organization_id=self.organization.id,
+            version='1' * 10,
+            date_added=self.now,
+        )
+        self.release1.add_project(self.proj1)
+        self.release2 = Release.objects.create(
+            organization_id=self.organization.id,
+            version='2' * 10,
+            date_added=self.now,
+        )
+        self.release2.add_project(self.proj1)
+
+        self.group1release1 = GroupRelease.objects.create(
+            project_id=self.proj1.id,
+            group_id=self.proj1group1.id,
+            release_id=self.release1.id
+        )
+        self.group1release2 = GroupRelease.objects.create(
+            project_id=self.proj1.id,
+            group_id=self.proj1group1.id,
+            release_id=self.release2.id
+        )
+        self.group2release1 = GroupRelease.objects.create(
+            project_id=self.proj1.id,
+            group_id=self.proj1group2.id,
+            release_id=self.release1.id
+        )
+
+    def test_translation(self):
+        # Case 1: No translation
+        filter_keys = {
+            'sdk': ['python', 'js']
+        }
+        forward, reverse = get_snuba_translators(filter_keys)
+        assert forward(filter_keys) == filter_keys
+        result = [
+            {'sdk': 'python', 'count': 123},
+            {'sdk': 'js', 'count': 234}
+        ]
+        assert all(reverse(row) == row for row in result)
+
+        # Case 2: Environment ID -> Name and back
+        filter_keys = {
+            'environment': [self.proj1env1.id]
+        }
+        forward, reverse = get_snuba_translators(filter_keys)
+        assert forward(filter_keys) == {
+            'environment': [self.proj1env1.name]
+        }
+        row = {
+            'environment': self.proj1env1.name,
+            'count': 123
+        }
+        assert reverse(row) == {
+            'environment': self.proj1env1.id,
+            'count': 123
+        }
+
+        # Case 3, both Environment and Release
+        filter_keys = {
+            'environment': [self.proj1env1.id],
+            'tags[sentry:release]': [self.release1.id]
+        }
+        forward, reverse = get_snuba_translators(filter_keys)
+        assert forward(filter_keys) == {
+            'environment': [self.proj1env1.name],
+            'tags[sentry:release]': [self.release1.version],
+        }
+        row = {
+            'environment': self.proj1env1.name,
+            'tags[sentry:release]': self.release1.version,
+            'count': 123
+        }
+        assert reverse(row) == {
+            'environment': self.proj1env1.id,
+            'tags[sentry:release]': self.release1.id,
+            'count': 123
+        }
+
+        # Case 4: 2 Groups, many-to-many mapping of Groups
+        # to Releases. Reverse translation depends on multiple
+        # fields.
+        filter_keys = {
+            'issue': [
+                self.proj1group1.id,
+                self.proj1group2.id
+            ],
+            'tags[sentry:release]': [
+                self.group1release1.id,
+                self.group1release2.id,
+                self.group2release1.id,
+            ]
+        }
+        forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=True)
+        assert forward(filter_keys) == {
+            'issue': [
+                self.proj1group1.id,
+                self.proj1group2.id
+            ],
+            'tags[sentry:release]': [
+                self.release1.version,
+                self.release2.version,
+                self.release1.version,  # Duplicated because 2 GroupReleases refer to it
+            ]
+        }
+        result = [
+            {
+                'issue': self.proj1group1.id,
+                'tags[sentry:release]': self.release1.version,
+                'count': 1
+            },
+            {
+                'issue': self.proj1group1.id,
+                'tags[sentry:release]': self.release2.version,
+                'count': 2
+            },
+            {
+                'issue': self.proj1group2.id,
+                'tags[sentry:release]': self.release1.version,
+                'count': 3
+            },
+        ]
+
+        result = [reverse(r) for r in result]
+        assert result == [
+            {
+                'issue': self.proj1group1.id,
+                'tags[sentry:release]': self.group1release1.id,
+                'count': 1
+            },
+            {
+                'issue': self.proj1group1.id,
+                'tags[sentry:release]': self.group1release2.id,
+                'count': 2
+            },
+            {
+                'issue': self.proj1group2.id,
+                'tags[sentry:release]': self.group2release1.id,
+                'count': 3
+            },
+        ]

+ 6 - 5
tests/snuba/tagstore/test_tagstore_backend.py

@@ -60,15 +60,15 @@ class TagStorageTest(SnubaTestCase):
                     'email': "user{}@sentry.io".format(r)
                 }
             },
-        } for r in range(1, 3)] + [{
+        } for r in [1, 2]] + [{
             'event_id': '3' * 32,
             'primary_hash': hash2,
             'project_id': self.proj1.id,
             'message': 'message 2',
             'platform': 'python',
-            'datetime': (self.now - timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
+            'datetime': (self.now - timedelta(seconds=2)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
             'data': {
-                'received': calendar.timegm(self.now.timetuple()) - r,
+                'received': calendar.timegm(self.now.timetuple()) - 2,
                 'tags': {
                     'browser': 'chrome',
                     'environment': self.proj1env1.name,
@@ -260,8 +260,9 @@ class TagStorageTest(SnubaTestCase):
         ])
         assert set(v.last_seen for v in result) == \
             set([self.now - timedelta(seconds=1), self.now - timedelta(seconds=2)])
-        assert result[0].last_seen == self.now - timedelta(seconds=1)
-        assert result[1].last_seen == self.now - timedelta(seconds=2)
+        result.sort(key=lambda x: x.last_seen)
+        assert result[0].last_seen == self.now - timedelta(seconds=2)
+        assert result[1].last_seen == self.now - timedelta(seconds=1)
         for v in result:
             assert v.value == 'user1'
 

+ 31 - 15
tests/snuba/tsdb/test_tsdb_backend.py

@@ -9,7 +9,7 @@ import six
 
 from django.conf import settings
 
-from sentry.models import GroupHash, Release
+from sentry.models import GroupHash, GroupRelease, Release
 from sentry.tsdb.base import TSDBModel
 from sentry.tsdb.snuba import SnubaTSDB
 from sentry.testutils import TestCase
@@ -90,6 +90,22 @@ class SnubaTSDBTest(TestCase):
         )
         self.release2.add_project(self.proj1)
 
+        self.group1release1 = GroupRelease.objects.create(
+            project_id=self.proj1.id,
+            group_id=self.proj1group1.id,
+            release_id=self.release1.id
+        )
+        self.group1release2 = GroupRelease.objects.create(
+            project_id=self.proj1.id,
+            group_id=self.proj1group1.id,
+            release_id=self.release2.id
+        )
+        self.group2release1 = GroupRelease.objects.create(
+            project_id=self.proj1.id,
+            group_id=self.proj1group2.id,
+            release_id=self.release1.id
+        )
+
         data = json.dumps([{
             'event_id': (six.text_type(r) * 32)[:32],
             'primary_hash': [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
@@ -345,42 +361,42 @@ class SnubaTSDBTest(TestCase):
         assert self.db.get_frequency_series(
             TSDBModel.frequent_releases_by_group,
             {
-                self.proj1group1.id: (self.release1.id, self.release2.id, ),
-                self.proj1group2.id: (self.release1.id, )
+                self.proj1group1.id: (self.group1release1.id, self.group1release2.id, ),
+                self.proj1group2.id: (self.group2release1.id, )
             },
             dts[0], dts[-1],
             rollup=3600,
         ) == {
             self.proj1group1.id: [
                 (timestamp(dts[0]), {
-                    self.release1.id: 0,
-                    self.release2.id: 0,
+                    self.group1release1.id: 0,
+                    self.group1release2.id: 0,
                 }),
                 (timestamp(dts[1]), {
-                    self.release1.id: 3,
-                    self.release2.id: 0,
+                    self.group1release1.id: 3,
+                    self.group1release2.id: 0,
                 }),
                 (timestamp(dts[2]), {
-                    self.release1.id: 0,
-                    self.release2.id: 3,
+                    self.group1release1.id: 0,
+                    self.group1release2.id: 3,
                 }),
                 (timestamp(dts[3]), {
-                    self.release1.id: 0,
-                    self.release2.id: 0,
+                    self.group1release1.id: 0,
+                    self.group1release2.id: 0,
                 }),
             ],
             self.proj1group2.id: [
                 (timestamp(dts[0]), {
-                    self.release1.id: 0,
+                    self.group2release1.id: 0,
                 }),
                 (timestamp(dts[1]), {
-                    self.release1.id: 3,
+                    self.group2release1.id: 3,
                 }),
                 (timestamp(dts[2]), {
-                    self.release1.id: 0,
+                    self.group2release1.id: 0,
                 }),
                 (timestamp(dts[3]), {
-                    self.release1.id: 0,
+                    self.group2release1.id: 0,
                 }),
             ],
         }