Просмотр исходного кода

fix: Zero-fill TSDB results for missing keys (#8136)

Alex Hofsteede 6 лет назад
Родитель
Сommit
8712dcafca
3 измененных файлов с 53 добавлено и 14 удалено
  1. 22 5
      src/sentry/tsdb/snuba.py
  2. 17 7
      tests/sentry/tsdb/test_snuba.py
  3. 14 2
      tests/snuba/tsdb/test_tsdb_backend.py

+ 22 - 5
src/sentry/tsdb/snuba.py

@@ -76,8 +76,25 @@ class SnubaTSDB(BaseTSDB):
         start = to_datetime(series[0])
         end = to_datetime(series[-1] + rollup)
 
-        return snuba.query(start, end, groupby, None, keys_map,
-                           aggregations, rollup)
+        result = snuba.query(start, end, groupby, None, keys_map,
+                             aggregations, rollup)
+
+        if group_on_time:
+            keys_map['time'] = series
+        self.zerofill(result, groupby, keys_map)
+
+        return result
+
+    def zerofill(self, result, groups, group_keys):
+        if len(groups) > 0:
+            for k in group_keys[groups[0]]:
+                if k not in result:
+                    result[k] = 0 if len(groups) == 1 else {}
+
+            if len(groups) > 1:
+                subgroups = groups[1:]
+                for v in result.values():
+                    self.zerofill(v, subgroups, group_keys)
 
     def get_range(self, model, keys, start, end, rollup=None, environment_id=None):
         result = self.get_data(model, keys, start, end, rollup, environment_id,
@@ -117,8 +134,8 @@ class SnubaTSDB(BaseTSDB):
         #    {group:[top1, ...]}
         # into
         #    {group: [(top1, score), ...]}
-        for k in result:
-            item_scores = [(v, float(i + 1)) for i, v in enumerate(reversed(result[k]))]
+        for k, top in six.iteritems(result):
+            item_scores = [(v, float(i + 1)) for i, v in enumerate(reversed(top or []))]
             result[k] = list(reversed(item_scores))
 
         return result
@@ -134,7 +151,7 @@ class SnubaTSDB(BaseTSDB):
         #    {group: [(timestamp, {top1: score, ...}), ...]}
         for k in result:
             result[k] = sorted([
-                (timestamp, {v: float(i + 1) for i, v in enumerate(reversed(topk))})
+                (timestamp, {v: float(i + 1) for i, v in enumerate(reversed(topk or []))})
                 for (timestamp, topk) in result[k].items()
             ])
 

+ 17 - 7
tests/sentry/tsdb/test_snuba.py

@@ -71,21 +71,22 @@ class SnubaTSDBRequestsTest(TestCase):
                 aggs = body.get('aggregations', [])
                 meta = [{'name': col} for col in body['groupby'] + [a[2] for a in aggs]]
                 datum = {col['name']: 1 for col in meta}
+                datum['project_id'] = project_id
                 if 'time' in datum:
                     datum['time'] = '2018-03-09T01:00:00Z'
                 for agg in aggs:
                     if agg[0].startswith('topK'):
-                        datum[agg[2]] = [1]
+                        datum[agg[2]] = [99]
                 return (200, {}, json.dumps({'data': [datum], 'meta': meta}))
 
             rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response)
 
             results = self.db.get_most_frequent(TSDBModel.frequent_issues_by_project,
-                                                [project_id], dts[0], dts[-1])
+                                                [project_id], dts[0], dts[0])
             assert has_shape(results, {1: [(1, 1.0)]})
 
             results = self.db.get_most_frequent_series(TSDBModel.frequent_issues_by_project,
-                                                       [project_id], dts[0], dts[-1])
+                                                       [project_id], dts[0], dts[0])
             assert has_shape(results, {1: [(1, {1: 1.0})]})
 
             items = {
@@ -169,8 +170,13 @@ class SnubaTSDBRequestsTest(TestCase):
                 }))
 
             rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response)
-            results = self.db.get_range(TSDBModel.release, [release.id], dts[0], dts[-1])
-            assert results == {release.id: [(to_timestamp(now), 100)]}
+            results = self.db.get_range(
+                TSDBModel.release, [release.id], dts[0], dts[-1], rollup=3600)
+            assert results == {
+                release.id: [
+                    (int(to_timestamp(d)), 100 if d == now else 0)
+                    for d in dts]
+            }
 
     @responses.activate
     def test_environment_request(self):
@@ -193,8 +199,12 @@ class SnubaTSDBRequestsTest(TestCase):
 
             rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response)
             results = self.db.get_range(TSDBModel.project, [project.id],
-                                        dts[0], dts[-1], environment_id=env.id)
-            assert results == {project.id: [(to_timestamp(now), 100)]}
+                                        dts[0], dts[-1], environment_id=env.id, rollup=3600)
+            assert results == {
+                project.id: [
+                    (int(to_timestamp(d)), 100 if d == now else 0)
+                    for d in dts]
+            }
 
     def test_invalid_model(self):
         with pytest.raises(Exception) as ex:

+ 14 - 2
tests/snuba/tsdb/test_tsdb_backend.py

@@ -123,7 +123,10 @@ class SnubaTSDBTest(TestCase):
             rollup=3600
         ) == {
             self.release.id: [
+                (timestamp(dts[0]), 0),
                 (timestamp(dts[1]), 6),
+                (timestamp(dts[2]), 0),
+                (timestamp(dts[3]), 0),
             ]
         }
 
@@ -165,7 +168,14 @@ class SnubaTSDBTest(TestCase):
             dts[0], dts[-1],
             rollup=3600,
             environment_id=self.proj1env2.id
-        ) == {}
+        ) == {
+            self.proj1.id: [
+                (timestamp(dts[0]), 0),
+                (timestamp(dts[1]), 0),
+                (timestamp(dts[2]), 0),
+                (timestamp(dts[3]), 0),
+            ]
+        }
 
     def test_range_rollups(self):
         # Daily
@@ -179,12 +189,14 @@ class SnubaTSDBTest(TestCase):
         ) == {
             self.proj1.id: [
                 (timestamp(dts[0]), 24),
+                (timestamp(dts[1]), 0)
             ]
         }
 
         # Minutely
         dts = [self.now + timedelta(minutes=i) for i in range(120)]
-        expected = [(to_timestamp(d), 1) for i, d in enumerate(dts) if i % 10 == 0]
+        # Expect every 10th minute to have a 1, else 0
+        expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)]
         assert self.db.get_range(
             TSDBModel.project,
             [self.proj1.id],