Browse Source

fix(tagstore): Disallow NULL environment_id and use 0 instead of NULL for 'aggregates' (#7530)

Brett Hoerner 7 years ago
parent
commit
352e4d9d06

+ 78 - 0
src/sentry/tagstore/south_migrations/0008_auto__chg_field_tagkey_environment_id.py

@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+from south.utils import datetime_utils as datetime
+from south.db import db
+from south.v2 import SchemaMigration
+from django.db import models
+
+
+class Migration(SchemaMigration):
+
+    # Flag to indicate if this migration is too risky
+    # to run online and needs to be coordinated for offline
+    is_dangerous = True
+
+    def forwards(self, orm):
+
+        # Changing field 'TagKey.environment_id'
+        db.alter_column(u'tagstore_tagkey', 'environment_id', self.gf(
+            'sentry.db.models.fields.bounded.BoundedBigIntegerField')(default=0))
+
+    def backwards(self, orm):
+
+        # Changing field 'TagKey.environment_id'
+        db.alter_column(u'tagstore_tagkey', 'environment_id', self.gf(
+            'sentry.db.models.fields.bounded.BoundedBigIntegerField')(null=True))
+
+    models = {
+        'tagstore.eventtag': {
+            'Meta': {'unique_together': "(('project_id', 'event_id', 'key', 'value'),)", 'object_name': 'EventTag', 'index_together': "(('project_id', 'key', 'value'), ('group_id', 'key', 'value'))"},
+            'date_added': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'db_index': 'True'}),
+            'event_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {}),
+            'group_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {}),
+            'id': ('sentry.db.models.fields.bounded.BoundedBigAutoField', [], {'primary_key': 'True'}),
+            'key': ('sentry.db.models.fields.foreignkey.FlexibleForeignKey', [], {'to': "orm['tagstore.TagKey']", 'db_column': "'key_id'"}),
+            'project_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {}),
+            'value': ('sentry.db.models.fields.foreignkey.FlexibleForeignKey', [], {'to': "orm['tagstore.TagValue']", 'db_column': "'value_id'"})
+        },
+        'tagstore.grouptagkey': {
+            'Meta': {'unique_together': "(('project_id', 'group_id', '_key'),)", 'object_name': 'GroupTagKey'},
+            '_key': ('sentry.db.models.fields.foreignkey.FlexibleForeignKey', [], {'to': "orm['tagstore.TagKey']", 'db_column': "'key_id'"}),
+            'group_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {'db_index': 'True'}),
+            'id': ('sentry.db.models.fields.bounded.BoundedBigAutoField', [], {'primary_key': 'True'}),
+            'project_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {'db_index': 'True'}),
+            'values_seen': ('sentry.db.models.fields.bounded.BoundedPositiveIntegerField', [], {'default': '0'})
+        },
+        'tagstore.grouptagvalue': {
+            'Meta': {'unique_together': "(('project_id', 'group_id', '_key', '_value'),)", 'object_name': 'GroupTagValue', 'index_together': "(('project_id', '_key', '_value', 'last_seen'),)"},
+            '_key': ('sentry.db.models.fields.foreignkey.FlexibleForeignKey', [], {'to': "orm['tagstore.TagKey']", 'db_column': "'key_id'"}),
+            '_value': ('sentry.db.models.fields.foreignkey.FlexibleForeignKey', [], {'to': "orm['tagstore.TagValue']", 'db_column': "'value_id'"}),
+            'first_seen': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'null': 'True', 'db_index': 'True'}),
+            'group_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {'db_index': 'True'}),
+            'id': ('sentry.db.models.fields.bounded.BoundedBigAutoField', [], {'primary_key': 'True'}),
+            'last_seen': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'null': 'True', 'db_index': 'True'}),
+            'project_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {'db_index': 'True'}),
+            'times_seen': ('sentry.db.models.fields.bounded.BoundedPositiveIntegerField', [], {'default': '0'})
+        },
+        'tagstore.tagkey': {
+            'Meta': {'unique_together': "(('project_id', 'environment_id', 'key'),)", 'object_name': 'TagKey'},
+            'environment_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {}),
+            'id': ('sentry.db.models.fields.bounded.BoundedBigAutoField', [], {'primary_key': 'True'}),
+            'key': ('django.db.models.fields.CharField', [], {'max_length': '32'}),
+            'project_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {'db_index': 'True'}),
+            'status': ('sentry.db.models.fields.bounded.BoundedPositiveIntegerField', [], {'default': '0'}),
+            'values_seen': ('sentry.db.models.fields.bounded.BoundedPositiveIntegerField', [], {'default': '0'})
+        },
+        'tagstore.tagvalue': {
+            'Meta': {'unique_together': "(('project_id', '_key', 'value'),)", 'object_name': 'TagValue', 'index_together': "(('project_id', '_key', 'last_seen'),)"},
+            '_key': ('sentry.db.models.fields.foreignkey.FlexibleForeignKey', [], {'to': "orm['tagstore.TagKey']", 'db_column': "'key_id'"}),
+            'data': ('sentry.db.models.fields.gzippeddict.GzippedDictField', [], {'null': 'True', 'blank': 'True'}),
+            'first_seen': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'null': 'True', 'db_index': 'True'}),
+            'id': ('sentry.db.models.fields.bounded.BoundedBigAutoField', [], {'primary_key': 'True'}),
+            'last_seen': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'null': 'True', 'db_index': 'True'}),
+            'project_id': ('sentry.db.models.fields.bounded.BoundedBigIntegerField', [], {'db_index': 'True'}),
+            'times_seen': ('sentry.db.models.fields.bounded.BoundedPositiveIntegerField', [], {'default': '0'}),
+            'value': ('django.db.models.fields.CharField', [], {'max_length': '200'})
+        }
+    }
+
+    complete_apps = ['tagstore']

+ 47 - 21
src/sentry/tagstore/v2/backend.py

@@ -30,6 +30,9 @@ from .models import EventTag, GroupTagKey, GroupTagValue, TagKey, TagValue
 logger = logging.getLogger('sentry.tagstore.v2')
 logger = logging.getLogger('sentry.tagstore.v2')
 
 
 
 
+AGGREGATE_ENVIRONMENT_ID = 0
+
+
 class V2TagStorage(TagStorage):
 class V2TagStorage(TagStorage):
     """\
     """\
     The v2 tagstore backend stores and respects ``environment_id``.
     The v2 tagstore backend stores and respects ``environment_id``.
@@ -136,6 +139,8 @@ class V2TagStorage(TagStorage):
                         })
                         })
 
 
     def create_tag_key(self, project_id, environment_id, key, **kwargs):
     def create_tag_key(self, project_id, environment_id, key, **kwargs):
+        environment_id = AGGREGATE_ENVIRONMENT_ID if environment_id is None else environment_id
+
         return TagKey.objects.create(
         return TagKey.objects.create(
             project_id=project_id,
             project_id=project_id,
             environment_id=environment_id,
             environment_id=environment_id,
@@ -144,6 +149,8 @@ class V2TagStorage(TagStorage):
         )
         )
 
 
     def get_or_create_tag_key(self, project_id, environment_id, key, **kwargs):
     def get_or_create_tag_key(self, project_id, environment_id, key, **kwargs):
+        assert environment_id is not None
+
         return TagKey.objects.get_or_create(
         return TagKey.objects.get_or_create(
             project_id=project_id,
             project_id=project_id,
             environment_id=environment_id,
             environment_id=environment_id,
@@ -152,6 +159,8 @@ class V2TagStorage(TagStorage):
         )
         )
 
 
     def create_tag_value(self, project_id, environment_id, key, value, **kwargs):
     def create_tag_value(self, project_id, environment_id, key, value, **kwargs):
+        environment_id = AGGREGATE_ENVIRONMENT_ID if environment_id is None else environment_id
+
         tag_key_kwargs = kwargs.copy()
         tag_key_kwargs = kwargs.copy()
         for k in ['times_seen', 'first_seen', 'last_seen']:
         for k in ['times_seen', 'first_seen', 'last_seen']:
             tag_key_kwargs.pop(k, None)
             tag_key_kwargs.pop(k, None)
@@ -168,6 +177,8 @@ class V2TagStorage(TagStorage):
 
 
     def get_or_create_tag_value(self, project_id, environment_id,
     def get_or_create_tag_value(self, project_id, environment_id,
                                 key, value, key_id=None, **kwargs):
                                 key, value, key_id=None, **kwargs):
+        assert environment_id is not None
+
         if key_id is None:
         if key_id is None:
             tag_key, _ = self.get_or_create_tag_key(
             tag_key, _ = self.get_or_create_tag_key(
                 project_id, environment_id, key, **kwargs)
                 project_id, environment_id, key, **kwargs)
@@ -181,6 +192,8 @@ class V2TagStorage(TagStorage):
         )
         )
 
 
     def create_group_tag_key(self, project_id, group_id, environment_id, key, **kwargs):
     def create_group_tag_key(self, project_id, group_id, environment_id, key, **kwargs):
+        environment_id = AGGREGATE_ENVIRONMENT_ID if environment_id is None else environment_id
+
         tag_key_kwargs = kwargs.copy()
         tag_key_kwargs = kwargs.copy()
         tag_key_kwargs.pop('values_seen', None)
         tag_key_kwargs.pop('values_seen', None)
 
 
@@ -195,6 +208,8 @@ class V2TagStorage(TagStorage):
         )
         )
 
 
     def get_or_create_group_tag_key(self, project_id, group_id, environment_id, key, **kwargs):
     def get_or_create_group_tag_key(self, project_id, group_id, environment_id, key, **kwargs):
+        assert environment_id is not None
+
         tag_key, _ = self.get_or_create_tag_key(
         tag_key, _ = self.get_or_create_tag_key(
             project_id, environment_id, key, **kwargs)
             project_id, environment_id, key, **kwargs)
 
 
@@ -207,6 +222,8 @@ class V2TagStorage(TagStorage):
 
 
     def create_group_tag_value(self, project_id, group_id, environment_id,
     def create_group_tag_value(self, project_id, group_id, environment_id,
                                key, value, **kwargs):
                                key, value, **kwargs):
+        environment_id = AGGREGATE_ENVIRONMENT_ID if environment_id is None else environment_id
+
         other_kwargs = kwargs.copy()
         other_kwargs = kwargs.copy()
         for k in ['times_seen', 'first_seen', 'last_seen']:
         for k in ['times_seen', 'first_seen', 'last_seen']:
             other_kwargs.pop(k, None)
             other_kwargs.pop(k, None)
@@ -227,6 +244,8 @@ class V2TagStorage(TagStorage):
 
 
     def get_or_create_group_tag_value(self, project_id, group_id,
     def get_or_create_group_tag_value(self, project_id, group_id,
                                       environment_id, key, value, **kwargs):
                                       environment_id, key, value, **kwargs):
+        assert environment_id is not None
+
         tag_key, _ = self.get_or_create_tag_key(
         tag_key, _ = self.get_or_create_tag_key(
             project_id, environment_id, key, **kwargs)
             project_id, environment_id, key, **kwargs)
 
 
@@ -436,7 +455,7 @@ class V2TagStorage(TagStorage):
 
 
     def incr_tag_value_times_seen(self, project_id, environment_id,
     def incr_tag_value_times_seen(self, project_id, environment_id,
                                   key, value, extra=None, count=1):
                                   key, value, extra=None, count=1):
-        for env in [environment_id, None]:
+        for env in [environment_id, AGGREGATE_ENVIRONMENT_ID]:
             tagkey, _ = self.get_or_create_tag_key(project_id, env, key)
             tagkey, _ = self.get_or_create_tag_key(project_id, env, key)
 
 
             buffer.incr(TagValue,
             buffer.incr(TagValue,
@@ -452,7 +471,7 @@ class V2TagStorage(TagStorage):
 
 
     def incr_group_tag_value_times_seen(self, project_id, group_id, environment_id,
     def incr_group_tag_value_times_seen(self, project_id, group_id, environment_id,
                                         key, value, extra=None, count=1):
                                         key, value, extra=None, count=1):
-        for env in [environment_id, None]:
+        for env in [environment_id, AGGREGATE_ENVIRONMENT_ID]:
             tagkey, _ = self.get_or_create_tag_key(project_id, env, key)
             tagkey, _ = self.get_or_create_tag_key(project_id, env, key)
             tagvalue, _ = self.get_or_create_tag_value(project_id, env, key, value)
             tagvalue, _ = self.get_or_create_tag_value(project_id, env, key, value)
 
 
@@ -472,12 +491,14 @@ class V2TagStorage(TagStorage):
         # NOTE: `environment_id=None` needs to be filtered differently in this method.
         # NOTE: `environment_id=None` needs to be filtered differently in this method.
         # EventTag never has NULL `environment_id` fields (individual Events always have an environment),
         # EventTag never has NULL `environment_id` fields (individual Events always have an environment),
         # and so `environment_id=None` needs to query EventTag for *all* environments (except, ironically
         # and so `environment_id=None` needs to query EventTag for *all* environments (except, ironically
-        # the aggregate environment, which is NULL).
+        # the aggregate environment).
 
 
         if environment_id is None:
         if environment_id is None:
             # filter for all 'real' environments
             # filter for all 'real' environments
-            env_filter = {'_key__environment_id__isnull': False}
+            exclude = {'_key__environment_id': AGGREGATE_ENVIRONMENT_ID}
+            env_filter = {}
         else:
         else:
+            exclude = {}
             env_filter = {'_key__environment_id': environment_id}
             env_filter = {'_key__environment_id': environment_id}
 
 
         tagvalue_qs = TagValue.objects.filter(
         tagvalue_qs = TagValue.objects.filter(
@@ -485,7 +506,12 @@ class V2TagStorage(TagStorage):
                          for k, v in six.iteritems(tags))),
                          for k, v in six.iteritems(tags))),
             project_id=project_id,
             project_id=project_id,
             **env_filter
             **env_filter
-        ).values_list('_key_id', 'id', '_key__key', 'value')
+        )
+
+        if exclude:
+            tagvalue_qs = tagvalue_qs.exclude(**exclude)
+
+        tagvalue_qs = tagvalue_qs.values_list('_key_id', 'id', '_key__key', 'value')
 
 
         tagvalues = defaultdict(list)
         tagvalues = defaultdict(list)
         for key_id, value_id, key, value in tagvalue_qs:
         for key_id, value_id, key, value in tagvalue_qs:
@@ -545,6 +571,8 @@ class V2TagStorage(TagStorage):
 
 
     def get_group_tag_value_count(self, project_id, group_id, environment_id, key):
     def get_group_tag_value_count(self, project_id, group_id, environment_id, key):
         if db.is_postgres():
         if db.is_postgres():
+            environment_id = AGGREGATE_ENVIRONMENT_ID if environment_id is None else environment_id
+
             # This doesnt guarantee percentage is accurate, but it does ensure
             # This doesnt guarantee percentage is accurate, but it does ensure
             # that the query has a maximum cost
             # that the query has a maximum cost
             using = router.db_for_read(GroupTagValue)
             using = router.db_for_read(GroupTagValue)
@@ -557,13 +585,13 @@ class V2TagStorage(TagStorage):
                     FROM tagstore_grouptagvalue
                     FROM tagstore_grouptagvalue
                     INNER JOIN tagstore_tagkey
                     INNER JOIN tagstore_tagkey
                     ON (tagstore_grouptagvalue.key_id = tagstore_tagkey.id)
                     ON (tagstore_grouptagvalue.key_id = tagstore_tagkey.id)
-                    WHERE tagstore_grouptagvalue.group_id = %%s
-                    AND tagstore_tagkey.environment_id %s %%s
-                    AND tagstore_tagkey.key = %%s
+                    WHERE tagstore_grouptagvalue.group_id = %s
+                    AND tagstore_tagkey.environment_id = %s
+                    AND tagstore_tagkey.key = %s
                     ORDER BY last_seen DESC
                     ORDER BY last_seen DESC
                     LIMIT 10000
                     LIMIT 10000
                 ) as a
                 ) as a
-            """ % ('IS' if environment_id is None else '='), [group_id, environment_id, key]
+            """, [group_id, environment_id, key]
             )
             )
             return cursor.fetchone()[0] or 0
             return cursor.fetchone()[0] or 0
 
 
@@ -578,6 +606,8 @@ class V2TagStorage(TagStorage):
 
 
     def get_top_group_tag_values(self, project_id, group_id, environment_id, key, limit=3):
     def get_top_group_tag_values(self, project_id, group_id, environment_id, key, limit=3):
         if db.is_postgres():
         if db.is_postgres():
+            environment_id = AGGREGATE_ENVIRONMENT_ID if environment_id is None else environment_id
+
             # This doesnt guarantee percentage is accurate, but it does ensure
             # This doesnt guarantee percentage is accurate, but it does ensure
             # that the query has a maximum cost
             # that the query has a maximum cost
             return list(
             return list(
@@ -597,14 +627,14 @@ class V2TagStorage(TagStorage):
                     INNER JOIN tagstore_tagkey
                     INNER JOIN tagstore_tagkey
                     ON (tagstore_grouptagvalue.key_id = tagstore_tagkey.id)
                     ON (tagstore_grouptagvalue.key_id = tagstore_tagkey.id)
                     WHERE tagstore_grouptagvalue.group_id = %%s
                     WHERE tagstore_grouptagvalue.group_id = %%s
-                    AND tagstore_tagkey.environment_id %s %%s
+                    AND tagstore_tagkey.environment_id = %%s
                     AND tagstore_tagkey.key = %%s
                     AND tagstore_tagkey.key = %%s
                     ORDER BY last_seen DESC
                     ORDER BY last_seen DESC
                     LIMIT 10000
                     LIMIT 10000
                 ) as a
                 ) as a
                 ORDER BY times_seen DESC
                 ORDER BY times_seen DESC
                 LIMIT %d
                 LIMIT %d
-            """ % ('IS' if environment_id is None else '=', limit), [group_id, environment_id, key]
+            """ % limit, [group_id, environment_id, key]
                 )
                 )
             )
             )
 
 
@@ -655,7 +685,7 @@ class V2TagStorage(TagStorage):
     def get_group_ids_for_users(self, project_ids, event_users, limit=100):
     def get_group_ids_for_users(self, project_ids, event_users, limit=100):
         return list(GroupTagValue.objects.filter(
         return list(GroupTagValue.objects.filter(
             project_id__in=project_ids,
             project_id__in=project_ids,
-            _key__environment_id__isnull=True,
+            _key__environment_id=AGGREGATE_ENVIRONMENT_ID,
             _key__key='sentry:user',
             _key__key='sentry:user',
             _value__value__in=[eu.tag_value for eu in event_users],
             _value__value__in=[eu.tag_value for eu in event_users],
         ).order_by('-last_seen').values_list('group_id', flat=True)[:limit])
         ).order_by('-last_seen').values_list('group_id', flat=True)[:limit])
@@ -668,7 +698,7 @@ class V2TagStorage(TagStorage):
 
 
         return list(GroupTagValue.objects.filter(
         return list(GroupTagValue.objects.filter(
             reduce(or_, tag_filters),
             reduce(or_, tag_filters),
-            _key__environment_id__isnull=True,
+            _key__environment_id=AGGREGATE_ENVIRONMENT_ID,
             _key__key='sentry:user',
             _key__key='sentry:user',
         ).order_by('-last_seen')[:limit])
         ).order_by('-last_seen')[:limit])
 
 
@@ -766,15 +796,11 @@ class V2TagStorage(TagStorage):
         Filter a queryset by the provided `environment_id`, handling
         Filter a queryset by the provided `environment_id`, handling
         whether a JOIN is required or not depending on the model.
         whether a JOIN is required or not depending on the model.
         """
         """
+        if environment_id is None:
+            environment_id = AGGREGATE_ENVIRONMENT_ID
         if queryset.model == TagKey:
         if queryset.model == TagKey:
-            if environment_id is None:
-                return queryset.filter(environment_id__isnull=True)
-            else:
-                return queryset.filter(environment_id=environment_id)
+            return queryset.filter(environment_id=environment_id)
         elif queryset.model in (TagValue, GroupTagKey, GroupTagValue):
         elif queryset.model in (TagValue, GroupTagKey, GroupTagValue):
-            if environment_id is None:
-                return queryset.filter(_key__environment_id__isnull=True)
-            else:
-                return queryset.filter(_key__environment_id=environment_id)
+            return queryset.filter(_key__environment_id=environment_id)
         else:
         else:
             raise ValueError("queryset of unsupported model '%s' provided" % queryset.model)
             raise ValueError("queryset of unsupported model '%s' provided" % queryset.model)

+ 1 - 1
src/sentry/tagstore/v2/models/tagkey.py

@@ -26,7 +26,7 @@ class TagKey(Model):
     __core__ = False
     __core__ = False
 
 
     project_id = BoundedBigIntegerField(db_index=True)
     project_id = BoundedBigIntegerField(db_index=True)
-    environment_id = BoundedBigIntegerField(null=True)
+    environment_id = BoundedBigIntegerField()
     key = models.CharField(max_length=MAX_TAG_KEY_LENGTH)
     key = models.CharField(max_length=MAX_TAG_KEY_LENGTH)
     values_seen = BoundedPositiveIntegerField(default=0)
     values_seen = BoundedPositiveIntegerField(default=0)
     status = BoundedPositiveIntegerField(
     status = BoundedPositiveIntegerField(

+ 1 - 0
tests/fixtures/cleanup-tagstore-v2.json

@@ -3,6 +3,7 @@
       "fields": {
       "fields": {
         "id": 1,
         "id": 1,
         "project_id": 1,
         "project_id": 1,
+        "environment_id": 0,
         "key": "key",
         "key": "key",
         "values_seen": 1,
         "values_seen": 1,
         "status": 0
         "status": 0

+ 2 - 2
tests/sentry/tagstore/v2/test_backend.py

@@ -589,7 +589,7 @@ class TagStorage(TestCase):
         v1, _ = self.ts.get_or_create_group_tag_value(
         v1, _ = self.ts.get_or_create_group_tag_value(
             self.proj1.id,
             self.proj1.id,
             self.proj1group1.id,
             self.proj1group1.id,
-            None,
+            0,
             'sentry:user',
             'sentry:user',
             'email:user@sentry.io')
             'email:user@sentry.io')
 
 
@@ -605,7 +605,7 @@ class TagStorage(TestCase):
         v1, _ = self.ts.get_or_create_group_tag_value(
         v1, _ = self.ts.get_or_create_group_tag_value(
             self.proj1.id,
             self.proj1.id,
             self.proj1group1.id,
             self.proj1group1.id,
-            None,
+            0,
             'sentry:user',
             'sentry:user',
             'email:user@sentry.io')
             'email:user@sentry.io')