Просмотр исходного кода

ref: Event normalization updates (#11036)

* Cleans up tests around null value handling
* Return canonicalkeydict from event-manager
* Ability to disable trimming
* Breadcrumbs behave more like Rust code path
* Ignore deprecated top-level attributes
* Removes dead breadcrumb normalization code
* Validate the frame column number
* Change frame context normalization behavior to match rusts
* ref(event_manager): Remove certain attributes during ingestion
* Un-break javascript sample event
* Upgrade libsemaphore
Markus Unterwaditzer 6 лет назад
Родитель
Сommit
f72ce52f03

+ 1 - 1
requirements-base.txt

@@ -54,7 +54,7 @@ redis>=2.10.3,<2.10.6
 requests-oauthlib==0.3.3
 requests[security]>=2.20.0,<2.21.0
 selenium==3.11.0
-semaphore>=0.2.0,<0.3.0
+semaphore>=0.3.0,<0.4.0
 sentry-sdk>=0.6.0,!=0.6.5
 setproctitle>=1.1.7,<1.2.0
 simplejson>=3.2.0,<3.9.0

+ 9 - 2
src/sentry/constants.py

@@ -166,8 +166,15 @@ HTTP_METHODS = ('GET', 'POST', 'PUT', 'OPTIONS', 'HEAD',
 CLIENT_RESERVED_ATTRS = (
     'project', 'errors', 'event_id', 'message', 'checksum', 'culprit', 'fingerprint', 'level',
     'time_spent', 'logger', 'server_name', 'site', 'received', 'timestamp', 'extra', 'modules',
-    'tags', 'platform', 'release', 'dist', 'environment', 'transaction', 'key_id', '_meta',
-    'applecrashreport', 'device', 'repos', 'query', 'type', 'hashes',
+    'tags', 'platform', 'release', 'dist', 'environment', 'transaction', '_meta',
+)
+
+# Deprecated or internal attributes that should be dropped silently
+CLIENT_IGNORED_ATTRS = (
+    # Internal attributes
+    'hashes', 'metadata', 'type', 'key_id', 'project', 'received',
+    # Deprecated attributes
+    'applecrashreport', 'device', 'repos', 'query',
 )
 
 # XXX: Must be all lowercase

+ 19 - 15
src/sentry/event_manager.py

@@ -20,7 +20,8 @@ from django.utils.encoding import force_text
 from sentry import buffer, eventtypes, eventstream, features, tsdb, filters
 from sentry.constants import (
     CLIENT_RESERVED_ATTRS, LOG_LEVELS, LOG_LEVELS_MAP, DEFAULT_LOG_LEVEL,
-    DEFAULT_LOGGER_NAME, MAX_CULPRIT_LENGTH, VALID_PLATFORMS, MAX_TAG_VALUE_LENGTH
+    DEFAULT_LOGGER_NAME, MAX_CULPRIT_LENGTH, VALID_PLATFORMS, MAX_TAG_VALUE_LENGTH,
+    CLIENT_IGNORED_ATTRS,
 )
 from sentry.coreapi import (
     APIError,
@@ -56,7 +57,7 @@ from sentry.utils.data_filters import (
 from sentry.utils.dates import to_timestamp
 from sentry.utils.db import is_postgres, is_mysql
 from sentry.utils.meta import Meta
-from sentry.utils.safe import safe_execute, trim, trim_dict, get_path, set_path, setdefault_path
+from sentry.utils.safe import ENABLE_TRIMMING, safe_execute, trim, trim_dict, get_path, set_path, setdefault_path
 from sentry.utils.strings import truncatechars
 from sentry.utils.geo import rust_geoip
 from sentry.utils.validators import is_float
@@ -458,22 +459,18 @@ class EventManager(object):
                 stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT,
                 valid_platforms=list(VALID_PLATFORMS),
                 max_secs_in_future=MAX_SECS_IN_FUTURE,
-                max_secs_in_past=MAX_SECS_IN_PAST
+                max_secs_in_past=MAX_SECS_IN_PAST,
+                enable_trimming=ENABLE_TRIMMING,
             )
 
             self._data = CanonicalKeyDict(rust_normalizer.normalize_event(dict(self._data)))
+
+            normalize_user_agent(self._data)
+
             return
 
         data = self._data
 
-        if self._for_store:
-            if self._project is not None:
-                data['project'] = self._project.id
-            if self._key is not None:
-                data['key_id'] = self._key.id
-            if self._auth is not None:
-                data['sdk'] = data.get('sdk') or parse_client_as_sdk(self._auth.client)
-
         # Before validating with a schema, attempt to cast values to their desired types
         # so that the schema doesn't have to take every type variation into account.
         text = six.text_type
@@ -538,8 +535,8 @@ class EventManager(object):
 
             # Ignore all top-level None and empty values, regardless whether
             # they are interfaces or not. For all other unrecognized attributes,
-            # we emit an explicit error.
-            if not value:
+            # we emit an explicit error, unless they are explicitly ignored.
+            if not value or k in CLIENT_IGNORED_ATTRS:
                 continue
 
             try:
@@ -555,6 +552,13 @@ class EventManager(object):
 
         # Additional data coercion and defaulting we only do for store.
         if self._for_store:
+            if self._project is not None:
+                data['project'] = self._project.id
+            if self._key is not None:
+                data['key_id'] = self._key.id
+            if self._auth is not None:
+                data['sdk'] = data.get('sdk') or parse_client_as_sdk(self._auth.client)
+
             level = data.get('level') or DEFAULT_LOG_LEVEL
             if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()):
                 level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL)
@@ -663,7 +667,7 @@ class EventManager(object):
         if server_name is not None:
             set_tag(data, 'server_name', server_name)
 
-        for key in ('fingerprint', 'modules', 'tags', 'extra'):
+        for key in ('fingerprint', 'modules', 'tags', 'extra', 'contexts'):
             if not data.get(key):
                 data.pop(key, None)
 
@@ -683,7 +687,7 @@ class EventManager(object):
         elif '_meta' in data:
             del data['_meta']
 
-        self._data = prune_empty_keys(data)
+        self._data = CanonicalKeyDict(prune_empty_keys(data))
 
     def should_filter(self):
         '''

+ 16 - 31
src/sentry/interfaces/breadcrumbs.py

@@ -12,25 +12,12 @@ __all__ = ('Breadcrumbs', )
 
 import six
 
+from sentry.constants import LOG_LEVELS_MAP
 from sentry.interfaces.base import Interface, InterfaceValidationError, prune_empty_keys
-from sentry.utils import json
 from sentry.utils.safe import get_path, trim
 from sentry.utils.dates import to_timestamp, to_datetime, parse_timestamp
 
 
-def _get_implied_category(category, type):
-    if category is not None:
-        return category
-    if type in ('critical', 'error', 'warning', 'info', 'debug'):
-        return type
-    # Common aliases
-    if type == 'warn':
-        return 'warning'
-    elif type == 'fatal':
-        return 'critical'
-    return 'info'
-
-
 class Breadcrumbs(Interface):
     """
     This interface stores information that leads up to an error.
@@ -83,7 +70,10 @@ class Breadcrumbs(Interface):
     @classmethod
     def normalize_crumb(cls, crumb):
         ty = crumb.get('type') or 'default'
-        level = crumb.get('level') or 'info'
+        level = crumb.get('level')
+        if level not in LOG_LEVELS_MAP and level != 'critical':
+            level = 'info'
+
         ts = parse_timestamp(crumb.get('timestamp'))
         if ts is None:
             raise InterfaceValidationError('Unable to determine timestamp for crumb')
@@ -100,22 +90,17 @@ class Breadcrumbs(Interface):
         event_id = crumb.get('event_id')
 
         data = crumb.get('data')
-        if data:
-            try:
-                for key, value in six.iteritems(data):
-                    if not isinstance(value, six.string_types):
-                        data[key] = json.dumps(value)
-            except AttributeError:
-                # TODO(dcramer): we dont want to discard the the rest of the
-                # crumb, but it'd be nice if we could record an error
-                # raise InterfaceValidationError(
-                #     'The ``data`` on breadcrumbs must be a mapping (received {})'.format(
-                #         type(crumb['data']),
-                #     )
-                # )
-                data = None
-            else:
-                data = trim(data, 4096)
+        if not isinstance(data, dict):
+            # TODO(dcramer): we dont want to discard the the rest of the
+            # crumb, but it'd be nice if we could record an error
+            # raise InterfaceValidationError(
+            #     'The ``data`` on breadcrumbs must be a mapping (received {})'.format(
+            #         type(crumb['data']),
+            #     )
+            # )
+            data = None
+        else:
+            data = trim(data, 4096)
 
         return {
             'type': ty,

+ 2 - 2
src/sentry/interfaces/contexts.py

@@ -13,7 +13,7 @@ import string
 
 from django.utils.encoding import force_text
 
-from sentry.interfaces.base import Interface
+from sentry.interfaces.base import Interface, prune_empty_keys
 from sentry.utils.contexts_normalization import normalize_os, normalize_runtime
 from sentry.utils.safe import get_path, trim
 
@@ -55,7 +55,7 @@ class ContextType(object):
     def to_json(self):
         rv = dict(self.data)
         rv['type'] = self.type
-        return rv
+        return prune_empty_keys(rv)
 
     @classmethod
     def values_for_data(cls, data):

+ 2 - 2
src/sentry/interfaces/exception.py

@@ -882,7 +882,7 @@ class SingleException(Interface):
         else:
             raw_stacktrace = None
 
-        return {
+        return prune_empty_keys({
             'type': self.type,
             'value': self.value,
             'mechanism': mechanism,
@@ -890,7 +890,7 @@ class SingleException(Interface):
             'stacktrace': stacktrace,
             'thread_id': self.thread_id,
             'raw_stacktrace': raw_stacktrace,
-        }
+        })
 
     def get_api_context(self, is_public=False):
         mechanism = isinstance(self.mechanism, Mechanism) and \

+ 18 - 11
src/sentry/interfaces/stacktrace.py

@@ -353,16 +353,20 @@ class Frame(Interface):
 
         # XXX: handle lines which were sent as 'null'
         context_line = trim(data.get('context_line'), 256)
-        if context_line is not None:
-            pre_context = data.get('pre_context', None)
-            if pre_context:
-                pre_context = [c or '' for c in pre_context]
-
-            post_context = data.get('post_context', None)
-            if post_context:
-                post_context = [c or '' for c in post_context]
+        pre_context = data.get('pre_context', None)
+        if isinstance(pre_context, list) and pre_context:
+            pre_context = [c or '' for c in pre_context]
         else:
-            pre_context, post_context = None, None
+            pre_context = None
+
+        post_context = data.get('post_context', None)
+        if isinstance(post_context, list) and post_context:
+            post_context = [c or '' for c in post_context]
+        else:
+            post_context = None
+
+        if not context_line and (pre_context or post_context):
+            context_line = ''
 
         in_app = validate_bool(data.get('in_app'), False)
 
@@ -397,7 +401,10 @@ class Frame(Interface):
             kwargs['lineno'] = None
 
         if data.get('colno') is not None:
-            kwargs['colno'] = int(data['colno'])
+            colno = int(data['colno'])
+            if colno < 0:
+                colno = None
+            kwargs['colno'] = colno
         else:
             kwargs['colno'] = None
 
@@ -417,7 +424,7 @@ class Frame(Interface):
             'instruction_addr': self.instruction_addr,
             'trust': self.trust,
             'in_app': self.in_app,
-            'context_line': self.context_line or None,
+            'context_line': self.context_line,
             'pre_context': self.pre_context or None,
             'post_context': self.post_context or None,
             'vars': self.vars or None,

+ 4 - 4
src/sentry/interfaces/threads.py

@@ -1,6 +1,6 @@
 from __future__ import absolute_import
 
-from sentry.interfaces.base import Interface
+from sentry.interfaces.base import Interface, prune_empty_keys
 from sentry.interfaces.stacktrace import Stacktrace
 from sentry.utils.safe import trim
 
@@ -54,11 +54,11 @@ class Threads(Interface):
                 rv['stacktrace'] = data['stacktrace'].to_json()
             if data['raw_stacktrace']:
                 rv['raw_stacktrace'] = data['raw_stacktrace'].to_json()
-            return rv
+            return prune_empty_keys(rv)
 
-        return {
+        return prune_empty_keys({
             'values': [export_thread(x) for x in self.values],
-        }
+        })
 
     def get_api_context(self, is_public=False):
         def export_thread(data):

+ 1 - 1
src/sentry/utils/canonical.py

@@ -144,7 +144,7 @@ class CanonicalKeyDict(collections.MutableMapping):
         del self.data[self._norm_func(key)]
 
     def __repr__(self):
-        return self.data.__repr__()
+        return 'CanonicalKeyDict(%s)' % (self.data.__repr__(),)
 
 
 CANONICAL_TYPES = (CanonicalKeyDict, CanonicalKeyView)

+ 10 - 6
src/sentry/utils/safe.py

@@ -7,6 +7,7 @@ sentry.utils.safe
 """
 from __future__ import absolute_import, print_function
 
+import os
 import collections
 import logging
 import six
@@ -19,6 +20,9 @@ from sentry.utils import json
 from sentry.utils.strings import truncatechars
 
 
+ENABLE_TRIMMING = os.environ.get("SENTRY_RUST_ENABLE_TRIMMING", "true") == "true"
+
+
 def safe_execute(func, *args, **kwargs):
     # TODO: we should make smart savepoints (only executing the savepoint server
     # side if we execute a query)
@@ -65,7 +69,7 @@ def trim(
         '_depth': _depth + 1,
     }
 
-    if _depth > max_depth:
+    if _depth > max_depth and ENABLE_TRIMMING:
         if not isinstance(value, six.string_types):
             value = json.dumps(value)
         return trim(value, _size=_size, max_size=max_size)
@@ -78,7 +82,7 @@ def trim(
             trim_v = trim(v, _size=_size, **options)
             result[k] = trim_v
             _size += len(force_text(trim_v)) + 1
-            if _size >= max_size:
+            if _size >= max_size and ENABLE_TRIMMING:
                 break
 
     elif isinstance(value, (list, tuple)):
@@ -88,12 +92,12 @@ def trim(
             trim_v = trim(v, _size=_size, **options)
             result.append(trim_v)
             _size += len(force_text(trim_v))
-            if _size >= max_size:
+            if _size >= max_size and ENABLE_TRIMMING:
                 break
         if isinstance(value, tuple):
             result = tuple(result)
 
-    elif isinstance(value, six.string_types):
+    elif isinstance(value, six.string_types) and ENABLE_TRIMMING:
         result = truncatechars(value, max_size - _size)
 
     else:
@@ -110,7 +114,7 @@ def trim_pairs(iterable, max_items=settings.SENTRY_MAX_DICTIONARY_ITEMS, **kwarg
     for idx, item in enumerate(iterable):
         key, value = item
         result.append((key, trim(value, **kwargs)))
-        if idx > max_items:
+        if idx > max_items and ENABLE_TRIMMING:
             return result
     return result
 
@@ -119,7 +123,7 @@ def trim_dict(value, max_items=settings.SENTRY_MAX_DICTIONARY_ITEMS, **kwargs):
     max_items -= 1
     for idx, key in enumerate(list(iter(value))):
         value[key] = trim(value[key], **kwargs)
-        if idx > max_items:
+        if idx > max_items and ENABLE_TRIMMING:
             del value[key]
     return value
 

Некоторые файлы не были показаны из-за большого количества измененных файлов