Browse Source

ref: Update the Logentry interface (#11078)

This removes some deprecated behavior from the logentry interface and makes it 
more deterministic. It is a preparation of our move to Rust normalization:

 - `message` is now an alias of `logentry` and `sentry.interfaces.Message`
 - `logentry.formatted` the canonical key, remove `logentry.message` if they are
   the same
 - Trim everything at the end, after interpolating
Jan Michael Auer 6 years ago
parent
commit
5ef5e51f67

+ 3 - 1
src/sentry/data/samples/invalid-interfaces.json

@@ -87,7 +87,9 @@
         ]
     },
     "logentry": {
-        "formatted": "without message attribute"
+        "params": [
+            "missing message or formatted"
+        ]
     },
     "request": {
         "method": "unknown"

+ 1 - 24
src/sentry/event_manager.py

@@ -490,7 +490,7 @@ class EventManager(object):
             'time_spent': lambda v: int(v) if v is not None else v,
             'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()],
             'platform': lambda v: v if v in VALID_PLATFORMS else 'other',
-            'logentry': lambda v: v if isinstance(v, dict) else {'message': v},
+            'logentry': lambda v: {'message': v} if (v and not isinstance(v, dict)) else (v or None),
 
             # These can be sent as lists and need to be converted to {'values': [...]}
             'exception': to_values,
@@ -513,29 +513,6 @@ class EventManager(object):
         data['timestamp'] = process_timestamp(data.get('timestamp'),
                                               meta.enter('timestamp'))
 
-        # raw 'message' is coerced to the Message interface.  Longer term
-        # we want to treat 'message' as a pure alias for 'logentry' but
-        # for now that won't be the case.
-        #
-        # TODO(mitsuhiko): the logic we want to apply here long term is
-        # to
-        #
-        # 1. make logentry.message optional
-        # 2. make logentry.formatted the primary value
-        # 3. always treat a string as an alias for `logentry.formatted`
-        # 4. remove the custom coercion logic here
-        msg_str = data.pop('message', None)
-        if msg_str:
-            msg_if = data.get('logentry')
-
-            if not msg_if:
-                msg_if = data['logentry'] = {'message': msg_str}
-                meta.enter('logentry', 'message').merge(meta.enter('message'))
-
-            if msg_if.get('message') != msg_str and not msg_if.get('formatted'):
-                msg_if['formatted'] = msg_str
-                meta.enter('logentry', 'formatted').merge(meta.enter('message'))
-
         # Fill in ip addresses marked as {{auto}}
         if self._client_ip:
             if get_path(data, 'request', 'env', 'REMOTE_ADDR') == '{{auto}}':

+ 39 - 49
src/sentry/interfaces/message.py

@@ -19,16 +19,22 @@ from sentry.utils import json
 from sentry.utils.safe import trim
 
 
+def stringify(value):
+    if isinstance(value, six.string_types):
+        return value
+
+    if isinstance(value, (int, float, bool)):
+        return json.dumps(value)
+
+    return None
+
+
 class Message(Interface):
     """
-    A standard message consisting of a ``message`` arg, an an optional
-    ``params`` arg for formatting, and an optional ``formatted`` message which
-    is the result of ``message`` combined with ``params``.
-
-    If your message cannot be parameterized, then the message interface
-    will serve no benefit.
+    A message consisting of either a ``formatted`` arg, or an optional
+    ``message`` with a list of ``params``.
 
-    - ``message`` must be no more than 1000 characters in length.
+    - ``message`` and ``formatted`` are limited to 1000 characters.
 
     >>> {
     >>>     "message": "My raw message with interpreted strings like %s",
@@ -43,54 +49,38 @@ class Message(Interface):
 
     @classmethod
     def to_python(cls, data):
-        if not data.get('message'):
-            raise InterfaceValidationError("No 'message' present")
-
-        # TODO(dcramer): some day we should stop people from sending arbitrary
-        # crap to the server
-        if not isinstance(data['message'], six.string_types):
-            data['message'] = json.dumps(data['message'])
-
-        kwargs = {
-            'message': trim(data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH),
-            'formatted': data.get('formatted'),
-        }
-
-        if data.get('params'):
-            kwargs['params'] = trim(data['params'], 1024)
+        formatted = stringify(data.get('formatted'))
+        message = stringify(data.get('message'))
+        if formatted is None and message is None:
+            raise InterfaceValidationError("No message present")
+
+        params = data.get('params')
+        if isinstance(params, (list, tuple)):
+            params = tuple(p for p in params)
+        elif isinstance(params, dict):
+            params = {k: v for k, v in six.iteritems(params)}
         else:
-            kwargs['params'] = ()
-
-        if kwargs['formatted']:
-            if not isinstance(kwargs['formatted'], six.string_types):
-                kwargs['formatted'] = json.dumps(data['formatted'])
-        # support python-esque formatting (e.g. %s)
-        elif '%' in kwargs['message'] and kwargs['params']:
-            if isinstance(kwargs['params'], list):
-                kwargs['params'] = tuple(kwargs['params'])
+            params = ()
 
+        if formatted is None and params:
             try:
-                kwargs['formatted'] = trim(
-                    kwargs['message'] % kwargs['params'],
-                    settings.SENTRY_MAX_MESSAGE_LENGTH,
-                )
-            except Exception:
-                pass
-        # support very basic placeholder formatters (non-typed)
-        elif '{}' in kwargs['message'] and kwargs['params']:
-            try:
-                kwargs['formatted'] = trim(
-                    kwargs['message'].format(kwargs['params']),
-                    settings.SENTRY_MAX_MESSAGE_LENGTH,
-                )
+                if '%' in message:
+                    formatted = message % params
+                elif '{}' in message and isinstance(params, tuple):
+                    formatted = message.format(*params)
+                # NB: Named newstyle arguments were never supported
             except Exception:
                 pass
 
-        # don't wastefully store formatted message twice
-        if kwargs['formatted'] == kwargs['message']:
-            kwargs['formatted'] = None
+        if formatted is None or message == formatted:
+            formatted = message
+            message = None
 
-        return cls(**kwargs)
+        return cls(
+            formatted=trim(formatted, settings.SENTRY_MAX_MESSAGE_LENGTH),
+            message=trim(message, settings.SENTRY_MAX_MESSAGE_LENGTH),
+            params=trim(params, 1024),
+        )
 
     def to_json(self):
         return prune_empty_keys({
@@ -100,7 +90,7 @@ class Message(Interface):
         })
 
     def get_hash(self):
-        return [self.message]
+        return [self.message or self.formatted]
 
     def to_string(self, event, is_public=False, **kwargs):
         return self.formatted or self.message

+ 38 - 24
src/sentry/utils/canonical.py

@@ -16,29 +16,44 @@ import six
 
 __all__ = ('CanonicalKeyDict', 'CanonicalKeyView', 'get_canonical_name')
 
-CANONICAL_KEY_MAPPING = {
-    'sentry.interfaces.Exception': 'exception',
-    'sentry.interfaces.Message': 'logentry',
-    'sentry.interfaces.Stacktrace': 'stacktrace',
-    'sentry.interfaces.Template': 'template',
-    'sentry.interfaces.Http': 'request',
-    'sentry.interfaces.User': 'user',
-    'sentry.interfaces.Csp': 'csp',
-    'sentry.interfaces.Breadcrumbs': 'breadcrumbs',
-    'sentry.interfaces.Contexts': 'contexts',
-    'sentry.interfaces.Threads': 'threads',
-    'sentry.interfaces.DebugMeta': 'debug_meta',
+
+LEGACY_KEY_MAPPING = {
+    'exception': ('sentry.interfaces.Exception',),
+    'logentry': ('sentry.interfaces.Message', 'message',),
+    'stacktrace': ('sentry.interfaces.Stacktrace',),
+    'template': ('sentry.interfaces.Template',),
+    'request': ('sentry.interfaces.Http',),
+    'user': ('sentry.interfaces.User',),
+    'csp': ('sentry.interfaces.Csp',),
+    'breadcrumbs': ('sentry.interfaces.Breadcrumbs',),
+    'contexts': ('sentry.interfaces.Contexts',),
+    'threads': ('sentry.interfaces.Threads',),
+    'debug_meta': ('sentry.interfaces.DebugMeta',),
 }
 
-LEGACY_KEY_MAPPING = {CANONICAL_KEY_MAPPING[k]: k for k in CANONICAL_KEY_MAPPING}
+
+CANONICAL_KEY_MAPPING = {
+    'message': ('logentry', 'sentry.interfaces.Message',),
+    'sentry.interfaces.Exception': ('exception',),
+    'sentry.interfaces.Message': ('logentry',),
+    'sentry.interfaces.Stacktrace': ('stacktrace',),
+    'sentry.interfaces.Template': ('template',),
+    'sentry.interfaces.Http': ('request',),
+    'sentry.interfaces.User': ('user',),
+    'sentry.interfaces.Csp': ('csp',),
+    'sentry.interfaces.Breadcrumbs': ('breadcrumbs',),
+    'sentry.interfaces.Contexts': ('contexts',),
+    'sentry.interfaces.Threads': ('threads',),
+    'sentry.interfaces.DebugMeta': ('debug_meta',),
+}
 
 
 def get_canonical_name(key):
-    return CANONICAL_KEY_MAPPING.get(key, key)
+    return CANONICAL_KEY_MAPPING.get(key, (key,))[0]
 
 
 def get_legacy_name(key):
-    return LEGACY_KEY_MAPPING.get(key, key)
+    return LEGACY_KEY_MAPPING.get(key, (key,))[0]
 
 
 class CanonicalKeyView(collections.Mapping):
@@ -58,18 +73,17 @@ class CanonicalKeyView(collections.Mapping):
         # Preserve the order of iteration while prioritizing canonical keys
         keys = list(self.data)
         for key in keys:
-            canonical = get_canonical_name(key)
-            if canonical == key or canonical not in keys:
-                yield canonical
+            canonicals = CANONICAL_KEY_MAPPING.get(key, ())
+            if not canonicals:
+                yield key
+            elif all(k not in keys for k in canonicals):
+                yield canonicals[0]
 
     def __getitem__(self, key):
         canonical = get_canonical_name(key)
-        if canonical in self.data:
-            return self.data[canonical]
-
-        legacy = get_legacy_name(key)
-        if legacy in self.data:
-            return self.data[legacy]
+        for k in (canonical,) + LEGACY_KEY_MAPPING.get(canonical, ()):
+            if k in self.data:
+                return self.data[k]
 
         raise KeyError(key)
 

+ 3 - 1
src/sentry/utils/samples.py

@@ -143,7 +143,9 @@ def load_data(platform, default=None, sample_name=None):
         return data
 
     data['platform'] = platform
-    data['message'] = 'This is an example %s exception' % (sample_name or platform, )
+    # XXX: Message is a legacy alias for logentry. Do not overwrite if set.
+    if 'message' not in data:
+        data['message'] = 'This is an example %s exception' % (sample_name or platform, )
     data['user'] = generate_user(
         ip_address='127.0.0.1',
         username='sentry',

+ 2 - 2
tests/integration/tests.py

@@ -176,7 +176,7 @@ class RavenIntegrationTest(TransactionTestCase):
         group = Group.objects.get()
         assert group.event_set.count() == 1
         instance = group.event_set.get()
-        assert instance.data['logentry']['message'] == 'foo'
+        assert instance.data['logentry']['formatted'] == 'foo'
 
 
 class SentryRemoteTest(TestCase):
@@ -503,7 +503,7 @@ class CspReportTest(TestCase):
         assert Event.objects.count() == 1
         e = Event.objects.all()[0]
         Event.objects.bind_nodes([e], 'data')
-        assert output['message'] == e.data['logentry']['message']
+        assert output['message'] == e.data['logentry']['formatted']
         for key, value in six.iteritems(output['tags']):
             assert e.get_tag(key) == value
         for key, value in six.iteritems(output['data']):

+ 2 - 2
tests/sentry/api/serializers/test_event.py

@@ -56,10 +56,10 @@ class EventSerializerTest(TestCase):
     def test_message_interface(self):
         event = self.create_event(
             data={
-                'logentry': {'message': 'bar'},
+                'logentry': {'formatted': 'bar'},
                 '_meta': {
                     'logentry': {
-                        'message': {'': {'err': ['some error']}},
+                        'formatted': {'': {'err': ['some error']}},
                     },
                 },
             }

+ 18 - 13
tests/sentry/event_manager/test_event_manager.py

@@ -1111,19 +1111,28 @@ class EventManagerTest(TransactionTestCase):
 
         assert event.message == 'hello world'
 
-    def test_bad_message(self):
-        # test that the message is handled gracefully
+    def test_stringified_message(self):
         manager = EventManager(make_event(**{
             'message': 1234,
         }))
         manager.normalize()
         event = manager.save(self.project.id)
 
-        assert event.message == '1234'
         assert event.data['logentry'] == {
-            'message': '1234',
+            'formatted': '1234',
         }
 
+    def test_bad_message(self):
+        # test that invalid messages are rejected
+        manager = EventManager(make_event(**{
+            'message': ['asdf'],
+        }))
+        manager.normalize()
+        event = manager.save(self.project.id)
+
+        assert event.message == '<unlabeled event>'
+        assert 'logentry' not in event.data
+
     def test_message_attribute_goes_to_interface(self):
         manager = EventManager(make_event(**{
             'message': 'hello world',
@@ -1131,13 +1140,11 @@ class EventManagerTest(TransactionTestCase):
         manager.normalize()
         event = manager.save(self.project.id)
         assert event.data['logentry'] == {
-            'message': 'hello world',
+            'formatted': 'hello world',
         }
 
-    def test_message_attribute_goes_to_formatted(self):
-        # The combining of 'message' and 'logentry' is a bit
-        # of a compatibility hack, and ideally we would just enforce a stricter
-        # schema instead of combining them like this.
+    def test_message_attribute_shadowing(self):
+        # Logentry shadows the legacy message attribute.
         manager = EventManager(
             make_event(
                 **{
@@ -1151,8 +1158,7 @@ class EventManagerTest(TransactionTestCase):
         manager.normalize()
         event = manager.save(self.project.id)
         assert event.data['logentry'] == {
-            'message': 'hello world',
-            'formatted': 'world hello',
+            'formatted': 'hello world',
         }
 
     def test_message_attribute_interface_both_strings(self):
@@ -1167,8 +1173,7 @@ class EventManagerTest(TransactionTestCase):
         manager.normalize()
         event = manager.save(self.project.id)
         assert event.data['logentry'] == {
-            'message': 'a plain string',
-            'formatted': 'another string',
+            'formatted': 'a plain string',
         }
 
     def test_throws_when_matches_discarded_hash(self):

+ 8 - 1
tests/sentry/event_manager/test_normalization.py

@@ -176,10 +176,17 @@ def test_long_message():
     )
     manager.normalize()
     data = manager.get_data()
-    assert len(data['logentry']['message']) == \
+    assert len(data['logentry']['formatted']) == \
         settings.SENTRY_MAX_MESSAGE_LENGTH
 
 
+def test_empty_message():
+    manager = EventManager(make_event(message=''))
+    manager.normalize()
+    data = manager.get_data()
+    assert 'logentry' not in data
+
+
 def test_default_version():
     manager = EventManager(make_event())
     manager.normalize()

+ 2 - 6
tests/sentry/event_manager/test_validate_data.py

@@ -366,8 +366,7 @@ def test_fingerprints():
 def test_messages():
     # Just 'message': wrap it in interface
     data = validate_and_normalize({"message": "foo is bar"})
-    assert "message" not in data
-    assert data["logentry"] == {"message": "foo is bar"}
+    assert data["logentry"] == {"formatted": "foo is bar"}
 
     # both 'message' and interface with no 'formatted' value, put 'message'
     # into 'formatted'.
@@ -377,10 +376,8 @@ def test_messages():
             "logentry": {"message": "something else"},
         }
     )
-    assert "message" not in data
     assert data["logentry"] == {
-        "message": "something else",
-        "formatted": "foo is bar",
+        "formatted": "something else",
     }
 
     # both 'message' and complete interface, 'message' is discarded
@@ -393,7 +390,6 @@ def test_messages():
             },
         }
     )
-    assert "message" not in data
     assert "errors" not in data
     assert data["logentry"] == {
         "message": "something else",

Some files were not shown because too many files changed in this diff