Browse Source

ref: Remove legacy message use internally (#10621)

* ref: Kill the last remains of the legacy message use internally

* ref: Remove unused group message fallback in ui

* ref: Change the test only fallback code in get_event_metadata

* ref: Refactor legacy code path for metadata

* ref: Stop using deprecated attributes

* chore: Remove unused print

* ref: Always run normalization in tests

* ref: Add metadata compat to tombstone

* fix: Fix some potentially bad snuba tests

* feat: Support user to create_event

* ref: Change for_store to ctor

* ref: Fixed an incorrect user set in create_event

* fix: fixed a typo

* fix: Pass an empty env in snuba test
Armin Ronacher 6 years ago
parent
commit
be3bb3c69f

+ 3 - 0
src/sentry/api/serializers/snuba.py

@@ -57,6 +57,9 @@ def geo_by_addr(ip):
 
 
 def serialize_eventusers(organization, item_list, user, lookup):
+    if not item_list:
+        return {}
+
     # We have no reliable way to map the tag value format
     # back into real EventUser rows. EventUser is only unique
     # per-project, and this is an organization aggregate.

+ 1 - 1
src/sentry/constants.py

@@ -167,7 +167,7 @@ CLIENT_RESERVED_ATTRS = (
     'project', 'errors', 'event_id', 'message', 'checksum', 'culprit', 'fingerprint', 'level',
     'time_spent', 'logger', 'server_name', 'site', 'received', 'timestamp', 'extra', 'modules',
     'tags', 'platform', 'release', 'dist', 'environment', 'transaction', 'key_id', '_meta',
-    'applecrashreport', 'device', 'repos', 'query',
+    'applecrashreport', 'device', 'repos', 'query', 'type',
 )
 
 # XXX: Must be all lowercase

+ 137 - 103
src/sentry/event_manager.py

@@ -84,6 +84,20 @@ SECURITY_REPORT_INTERFACES = (
 ENABLE_RUST = os.environ.get("SENTRY_USE_RUST_NORMALIZER", "false").lower() in ("1", "true")
 
 
+def get_event_metadata_compat(data, fallback_message):
+    """This is a fallback path to getting the event metadata.  This is used
+    by some code paths that could potentially deal with old sentry events that
+    do not have metadata yet.  This does not happen in practice any more but
+    the testsuite was never adapted so the tests hit this code path constantly.
+    """
+    etype = data.get('type') or 'default'
+    if 'metadata' not in data:
+        data = dict(data)
+        data['logentry'] = {'formatted': fallback_message}
+        return eventtypes.get(etype)(data).get_metadata()
+    return data['metadata']
+
+
 def count_limit(count):
     # TODO: could we do something like num_to_store = max(math.sqrt(100*count)+59, 200) ?
     # ~ 150 * ((log(n) - 1.5) ^ 2 - 0.25)
@@ -386,6 +400,7 @@ class EventManager(object):
         auth=None,
         key=None,
         content_encoding=None,
+        for_store=True,
     ):
         self._data = _decode_event(data, content_encoding=content_encoding)
         self.version = version
@@ -394,6 +409,7 @@ class EventManager(object):
         self._user_agent = user_agent
         self._auth = auth
         self._key = key
+        self._for_store = for_store
 
     def process_csp_report(self):
         """Only called from the CSP report endpoint."""
@@ -466,14 +482,19 @@ class EventManager(object):
             return
 
         data = self._data
-        if self._project is not None:
-            data['project'] = self._project.id
-        if self._key is not None:
-            data['key_id'] = self._key.id
-        if self._auth is not None:
-            data['sdk'] = data.get('sdk') or parse_client_as_sdk(self._auth.client)
 
-        errors = data['errors'] = []
+        if self._for_store:
+            if self._project is not None:
+                data['project'] = self._project.id
+            if self._key is not None:
+                data['key_id'] = self._key.id
+            if self._auth is not None:
+                data['sdk'] = data.get('sdk') or parse_client_as_sdk(self._auth.client)
+
+        # permit the client to transmit errors as well.
+        errors = data.get('errors')
+        if not errors:
+            errors = data['errors'] = []
 
         # Before validating with a schema, attempt to cast values to their desired types
         # so that the schema doesn't have to take every type variation into account.
@@ -516,10 +537,17 @@ class EventManager(object):
                     errors.append({'type': EventError.INVALID_DATA, 'name': c, 'value': data[c]})
                     del data[c]
 
-        # raw 'message' is coerced to the Message interface, as its used for pure index of
-        # searchable strings. If both a raw 'message' and a Message interface exist, try and
-        # add the former as the 'formatted' attribute of the latter.
-        # See GH-3248
+        # raw 'message' is coerced to the Message interface.  Longer term
+        # we want to treat 'message' as a pure alias for 'logentry' but
+        # for now that won't be the case.
+        #
+        # TODO(mitsuhiko): the logic we want to apply here long term is
+        # to
+        #
+        # 1. make logentry.message optional
+        # 2. make logentry.formatted the primary value
+        # 3. always treat a string as an alias for `logentry.formatted`
+        # 4. remove the custom coercion logic here
         msg_str = data.pop('message', None)
         if msg_str:
             msg_if = data.get('logentry')
@@ -587,63 +615,64 @@ class EventManager(object):
                 log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True)
                 errors.append({'type': EventError.INVALID_DATA, 'name': k, 'value': value})
 
-        # Additional data coercion and defaulting
-        level = data.get('level') or DEFAULT_LOG_LEVEL
-        if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()):
-            level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
-        data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])
-
-        if data.get('dist') and not data.get('release'):
-            data['dist'] = None
-
-        timestamp = data.get('timestamp')
-        if not timestamp:
-            timestamp = timezone.now()
-
-        # TODO (alex) can this all be replaced by utcnow?
-        # it looks like the only time that this would even be hit is when timestamp
-        # is not defined, as the earlier process_timestamp already converts existing
-        # timestamps to floats.
-        if isinstance(timestamp, datetime):
-            # We must convert date to local time so Django doesn't mess it up
-            # based on TIME_ZONE
-            if settings.TIME_ZONE:
-                if not timezone.is_aware(timestamp):
-                    timestamp = timestamp.replace(tzinfo=timezone.utc)
-            elif timezone.is_aware(timestamp):
-                timestamp = timestamp.replace(tzinfo=None)
-            timestamp = float(timestamp.strftime('%s'))
-
-        data['timestamp'] = timestamp
-        data['received'] = float(timezone.now().strftime('%s'))
-
-        data.setdefault('checksum', None)
-        data.setdefault('culprit', None)
-        data.setdefault('dist', None)
-        data.setdefault('environment', None)
-        data.setdefault('extra', {})
-        data.setdefault('fingerprint', None)
-        data.setdefault('logger', DEFAULT_LOGGER_NAME)
-        data.setdefault('platform', None)
-        data.setdefault('server_name', None)
-        data.setdefault('site', None)
-        data.setdefault('tags', [])
-        data.setdefault('transaction', None)
-
-        # Fix case where legacy apps pass 'environment' as a tag
-        # instead of a top level key.
-        # TODO (alex) save() just reinserts the environment into the tags
-        if not data.get('environment'):
-            tagsdict = dict(data['tags'])
-            if 'environment' in tagsdict:
-                data['environment'] = tagsdict['environment']
-                del tagsdict['environment']
-                data['tags'] = tagsdict.items()
-
-        # the SDKs currently do not describe event types, and we must infer
-        # them from available attributes
-        data['type'] = eventtypes.infer(data).key
-        data['version'] = self.version
+        # Additional data coercion and defaulting we only do for store.
+        if self._for_store:
+            level = data.get('level') or DEFAULT_LOG_LEVEL
+            if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()):
+                level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
+            data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL])
+
+            if data.get('dist') and not data.get('release'):
+                data['dist'] = None
+
+            timestamp = data.get('timestamp')
+            if not timestamp:
+                timestamp = timezone.now()
+
+            # TODO (alex) can this all be replaced by utcnow?
+            # it looks like the only time that this would even be hit is when timestamp
+            # is not defined, as the earlier process_timestamp already converts existing
+            # timestamps to floats.
+            if isinstance(timestamp, datetime):
+                # We must convert date to local time so Django doesn't mess it up
+                # based on TIME_ZONE
+                if settings.TIME_ZONE:
+                    if not timezone.is_aware(timestamp):
+                        timestamp = timestamp.replace(tzinfo=timezone.utc)
+                elif timezone.is_aware(timestamp):
+                    timestamp = timestamp.replace(tzinfo=None)
+                timestamp = float(timestamp.strftime('%s'))
+
+            data['timestamp'] = timestamp
+            data['received'] = float(timezone.now().strftime('%s'))
+
+            data.setdefault('checksum', None)
+            data.setdefault('culprit', None)
+            data.setdefault('dist', None)
+            data.setdefault('environment', None)
+            data.setdefault('extra', {})
+            data.setdefault('fingerprint', None)
+            data.setdefault('logger', DEFAULT_LOGGER_NAME)
+            data.setdefault('platform', None)
+            data.setdefault('server_name', None)
+            data.setdefault('site', None)
+            data.setdefault('tags', [])
+            data.setdefault('transaction', None)
+
+            # Fix case where legacy apps pass 'environment' as a tag
+            # instead of a top level key.
+            # TODO (alex) save() just reinserts the environment into the tags
+            if not data.get('environment'):
+                tagsdict = dict(data['tags'])
+                if 'environment' in tagsdict:
+                    data['environment'] = tagsdict['environment']
+                    del tagsdict['environment']
+                    data['tags'] = tagsdict.items()
+
+            # the SDKs currently do not describe event types, and we must infer
+            # them from available attributes
+            data['type'] = eventtypes.infer(data).key
+            data['version'] = self.version
 
         exception = data.get('exception')
         stacktrace = data.get('stacktrace')
@@ -674,15 +703,22 @@ class EventManager(object):
             data.setdefault('user', {}).setdefault('ip_address', self._client_ip)
 
         # Trim values
-        data['logger'] = trim(data['logger'].strip(), 64)
-        trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)
+        if data.get('logger'):
+            data['logger'] = trim(data['logger'].strip(), 64)
+
+        if data.get('extra'):
+            trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE)
 
-        if data['culprit']:
+        if data.get('culprit'):
             data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH)
 
-        if data['transaction']:
+        if data.get('transaction'):
             data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH)
 
+        # Do not add errors unless there are for non store mode
+        if not self._for_store and not data.get('errors'):
+            self._data.pop('errors')
+
         self._data = data
 
     def should_filter(self):
@@ -754,6 +790,29 @@ class EventManager(object):
             platform=platform
         )
 
+    def get_search_message(self, data, event_metadata=None, culprit=None):
+        """This generates the internal event.message attribute which is used
+        for search purposes.  It adds a bunch of data from the metadata and
+        the culprit.
+        """
+        message = ''
+
+        if 'logentry' in data:
+            message += (data['logentry'].get('formatted') or
+                        data['logentry'].get('message') or '')
+
+        if event_metadata:
+            for value in six.itervalues(event_metadata):
+                value_u = force_text(value, errors='replace')
+                if value_u not in message:
+                    message = u'{} {}'.format(message, value_u)
+
+        if culprit and culprit not in message:
+            culprit_u = force_text(culprit, errors='replace')
+            message = u'{} {}'.format(message, culprit_u)
+
+        return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)
+
     def save(self, project_id, raw=False):
         from sentry.tasks.post_process import index_event_tags
 
@@ -799,8 +858,10 @@ class EventManager(object):
         environment = data.pop('environment', None)
         recorded_timestamp = data.get("timestamp")
 
-        # unused
-        message = data.pop('message', '')
+        # old events had a small chance of having a legacy message
+        # attribute show up here.  In all reality this is being coerced
+        # into logentry for more than two years at this point (2018).
+        data.pop('message', None)
 
         event = self._get_event_instance(project_id=project_id)
         event._project_cache = project
@@ -897,46 +958,19 @@ class EventManager(object):
         else:
             hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)]
 
-        # TODO(dcramer): temp workaround for complexity
-        data['message'] = message
         event_type = eventtypes.get(data.get('type', 'default'))(data)
         event_metadata = event_type.get_metadata()
-        # TODO(dcramer): temp workaround for complexity
-        del data['message']
 
         data['type'] = event_type.key
         data['metadata'] = event_metadata
 
         # index components into ``Event.message``
         # See GH-3248
-        if event_type.key != 'default':
-            if 'logentry' in data and \
-                    data['logentry']['message'] != message:
-                message = u'{} {}'.format(
-                    message,
-                    data['logentry']['message'],
-                )
-
-        if not message:
-            message = ''
-        elif not isinstance(message, six.string_types):
-            message = force_text(message)
-
-        for value in six.itervalues(event_metadata):
-            value_u = force_text(value, errors='replace')
-            if value_u not in message:
-                message = u'{} {}'.format(message, value_u)
-
-        if culprit and culprit not in message:
-            culprit_u = force_text(culprit, errors='replace')
-            message = u'{} {}'.format(message, culprit_u)
-
-        message = trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH)
+        event.message = self.get_search_message(data, event_metadata, culprit)
 
-        event.message = message
         kwargs = {
             'platform': platform,
-            'message': message
+            'message': event.message
         }
 
         received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s'))

+ 3 - 7
src/sentry/eventtypes/base.py

@@ -27,13 +27,9 @@ class DefaultEvent(BaseEvent):
         return True
 
     def get_metadata(self):
-        # See GH-3248
-        message_interface = self.data.get(
-            'logentry', {
-                'message': self.data.get('message', ''),
-            }
-        )
-        message = strip(message_interface.get('formatted', message_interface['message']))
+        message_interface = self.data.get('logentry') or {}
+        message = strip(message_interface.get('formatted') or
+                        message_interface.get('message'))
         if not message:
             title = '<unlabeled event>'
         else:

+ 2 - 8
src/sentry/models/event.py

@@ -116,14 +116,8 @@ class Event(Model):
 
         See ``sentry.eventtypes``.
         """
-        etype = self.data.get('type', 'default')
-        if 'metadata' not in self.data:
-            # TODO(dcramer): remove after Dec 1 2016
-            data = dict(self.data or {})
-            data['message'] = self.message
-            data = CanonicalKeyView(data)
-            return eventtypes.get(etype)(data).get_metadata()
-        return self.data['metadata']
+        from sentry.event_manager import get_event_metadata_compat
+        return get_event_metadata_compat(self.data, self.message)
 
     @property
     def title(self):

+ 2 - 8
src/sentry/models/group.py

@@ -392,14 +392,8 @@ class Group(Model):
 
         See ``sentry.eventtypes``.
         """
-        etype = self.data.get('type')
-        if etype is None:
-            etype = 'default'
-        if 'metadata' not in self.data:
-            data = self.data.copy() if self.data else {}
-            data['message'] = self.message
-            return eventtypes.get(etype)(data).get_metadata()
-        return self.data['metadata']
+        from sentry.event_manager import get_event_metadata_compat
+        return get_event_metadata_compat(self.data, self.message)
 
     @property
     def title(self):

+ 2 - 9
src/sentry/models/grouptombstone.py

@@ -4,7 +4,6 @@ import logging
 
 from django.db import models
 
-from sentry import eventtypes
 from sentry.constants import LOG_LEVELS, MAX_CULPRIT_LENGTH
 from sentry.db.models import (
     BoundedPositiveIntegerField, FlexibleForeignKey, GzippedDictField, Model
@@ -48,11 +47,5 @@ class GroupTombstone(Model):
 
         See ``sentry.eventtypes``.
         """
-        etype = self.data.get('type')
-        if etype is None:
-            etype = 'default'
-        if 'metadata' not in self.data:
-            data = self.data.copy() if self.data else {}
-            data['message'] = self.message
-            return eventtypes.get(etype)(data).get_metadata()
-        return self.data['metadata']
+        from sentry.event_manager import get_event_metadata_compat
+        return get_event_metadata_compat(self.data, self.message)

+ 1 - 1
src/sentry/static/sentry/app/views/groupDetails.jsx

@@ -188,7 +188,7 @@ const GroupDetails = createReactClass({
       case 'default':
         return group.metadata.title;
       default:
-        return group.message.split('\n')[0];
+        return '';
     }
   },
 

+ 1 - 1
src/sentry/templates/sentry/emails/activity/generic.txt

@@ -7,7 +7,7 @@
 
 ## Issue Details
 
-{{ group.message_short }}
+{{ group.title }}
 
 {{ link }}
 {% if unsubscribe_link %}

+ 1 - 1
src/sentry/templates/sentry/emails/activity/new-user-feedback.txt

@@ -9,7 +9,7 @@
 
 ## Details
 
-{{ group.message_short }}
+{{ group.title }}
 
 {{ link }}
 

Some files were not shown because too many files changed in this diff