Просмотр исходного кода

feat: Implement minidump ingestion without symbolication (#6416)

* feat: Support chunked transfer encoding

* feat(minidump): Implement a most basic minidump event endpoint

* feat(minidump): Upload minidumps on event creation

* feat(minidump): Leave the minidump GUID in the extra dict

* ref(minidump): Move minidump upload to the coreapi helper

* feat(minidump): Parse minidumps into an event interface on upload

* feat(minidump): Display the same UI for minidump and cocoa

* feat(minidump): Display the minidump endpoint in the UI

* ref(minidump): Remove stacktraces from the crashed thread

* fix(minidump): Make allowed content_types a tuple in MinidumpView

* feat(minidump): Add feature flags to hide the minidump endpoint UI

* build(minidump): Require a more recent symbolic version

* feat(minidump): Add a platform constant for "native"
Jan Michael Auer 7 лет назад
Родитель
Сommit
8f9648df60

+ 1 - 1
requirements-base.txt

@@ -46,7 +46,7 @@ setproctitle>=1.1.7,<1.2.0
 statsd>=3.1.0,<3.2.0
 structlog==16.1.0
 sqlparse>=0.1.16,<0.2.0
-symbolic>=1.0.3,<2.0.0
+symbolic>=1.1.0,<2.0.0
 toronado>=0.0.11,<0.1.0
 ua-parser>=0.6.1,<0.8.0
 urllib3>=1.22,<1.23

+ 1 - 1
src/sentry/api/serializers/models/project.py

@@ -105,7 +105,7 @@ class ProjectSerializer(Serializer):
         feature_list = []
         for feature in (
             'global-events', 'data-forwarding', 'rate-limits', 'custom-filters', 'similarity-view',
-            'custom-inbound-filters',
+            'custom-inbound-filters', 'minidump',
         ):
             if features.has('projects:' + feature, obj, actor=user):
                 feature_list.append(feature)

+ 1 - 0
src/sentry/api/serializers/models/project_key.py

@@ -25,6 +25,7 @@ class ProjectKeySerializer(Serializer):
                 'secret': obj.dsn_private,
                 'public': obj.dsn_public,
                 'csp': obj.csp_endpoint,
+                'minidump': obj.minidump_endpoint,
             },
             'dateCreated': obj.date_added,
         }

+ 2 - 0
src/sentry/conf/server.py

@@ -203,6 +203,7 @@ TEMPLATE_LOADERS = (
 )
 
 MIDDLEWARE_CLASSES = (
+    'sentry.middleware.proxy.ChunkedMiddleware',
     'sentry.middleware.proxy.ContentLengthHeaderMiddleware',
     'sentry.middleware.security.SecurityHeadersMiddleware',
     'sentry.middleware.maintenance.ServicesUnavailableMiddleware',
@@ -733,6 +734,7 @@ SENTRY_FEATURES = {
     'projects:rate-limits': True,
     'projects:custom-filters': False,
     'projects:custom-inbound-filters': False,
+    'projects:minidump': False,
 }
 
 # Default time zone for localization in the UI.

+ 13 - 6
src/sentry/constants.py

@@ -39,7 +39,9 @@ DATA_ROOT = os.path.join(MODULE_ROOT, 'data')
 
 SORT_OPTIONS = OrderedDict(
     (
-        ('priority', _('Priority')), ('date', _('Last Seen')), ('new', _('First Seen')),
+        ('priority', _('Priority')),
+        ('date', _('Last Seen')),
+        ('new', _('First Seen')),
         ('freq', _('Frequency')),
     )
 )
@@ -106,7 +108,8 @@ DEFAULT_SORT_OPTION = 'date'
 
 # Setup languages for only available locales
 LANGUAGE_MAP = dict(settings.LANGUAGES)
-LANGUAGES = [(k, LANGUAGE_MAP[k]) for k in get_all_languages() if k in LANGUAGE_MAP]
+LANGUAGES = [(k, LANGUAGE_MAP[k])
+             for k in get_all_languages() if k in LANGUAGE_MAP]
 
 # TODO(dcramer): We eventually want to make this user-editable
 TAG_LABELS = {
@@ -134,7 +137,8 @@ SENTRY_RULES = (
 )
 
 # methods as defined by http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html + PATCH
-HTTP_METHODS = ('GET', 'POST', 'PUT', 'OPTIONS', 'HEAD', 'DELETE', 'TRACE', 'CONNECT', 'PATCH')
+HTTP_METHODS = ('GET', 'POST', 'PUT', 'OPTIONS', 'HEAD',
+                'DELETE', 'TRACE', 'CONNECT', 'PATCH')
 
 CLIENT_RESERVED_ATTRS = (
     'project', 'errors', 'event_id', 'message', 'checksum', 'culprit', 'fingerprint', 'level',
@@ -177,6 +181,7 @@ VALID_PLATFORMS = set(
         'elixir',
         'haskell',
         'groovy',
+        'native',
     ]
 )
 
@@ -304,7 +309,8 @@ def get_integration_id_for_event(platform, sdk_name, integrations):
                 return integration_id
 
     # try sdk name, for example "sentry-java" -> "java" or "raven-java:log4j" -> "java-log4j"
-    sdk_name = sdk_name.lower().replace("sentry-", "").replace("raven-", "").replace(":", "-")
+    sdk_name = sdk_name.lower().replace(
+        "sentry-", "").replace("raven-", "").replace(":", "-")
     if sdk_name in INTEGRATION_ID_TO_PLATFORM_DATA:
         return sdk_name
 
@@ -322,8 +328,9 @@ class ObjectStatus(object):
     @classmethod
     def as_choices(cls):
         return (
-            (cls.VISIBLE, 'visible'), (cls.HIDDEN,
-                                       'hidden'), (cls.PENDING_DELETION, 'pending_deletion'),
+            (cls.VISIBLE, 'visible'),
+            (cls.HIDDEN, 'hidden'),
+            (cls.PENDING_DELETION, 'pending_deletion'),
             (cls.DELETION_IN_PROGRESS, 'deletion_in_progress'),
         )
 

+ 78 - 1
src/sentry/coreapi.py

@@ -39,7 +39,7 @@ from sentry.db.models import BoundedIntegerField
 from sentry.interfaces.base import get_interface, InterfaceValidationError
 from sentry.interfaces.csp import Csp
 from sentry.event_manager import EventManager
-from sentry.models import EventError, ProjectKey
+from sentry.models import EventError, ProjectKey, upload_minidump, merge_minidump_event
 from sentry.tasks.store import preprocess_event, \
     preprocess_event_from_reprocessing
 from sentry.utils import json
@@ -850,6 +850,83 @@ class ClientApiHelper(object):
                    event_id=data['event_id'])
 
 
+class MinidumpApiHelper(ClientApiHelper):
+    def origin_from_request(self, request):
+        # We don't use an origin here
+        return None
+
+    def auth_from_request(self, request):
+        key = request.GET.get('sentry_key')
+        if not key:
+            raise APIUnauthorized('Unable to find authentication information')
+
+        auth = Auth({'sentry_key': key}, is_public=True)
+        auth.client = 'sentry-minidump'
+        return auth
+
+    def validate_data(self, project, data):
+        try:
+            release = data.pop('release')
+        except KeyError:
+            release = None
+
+        # Minidump request payloads do not have the same structure as
+        # usual events from other SDKs. Most importantly, all parameters
+        # passed in the POST body are only "extra" information. The
+        # actual information is in the "upload_file_minidump" field.
+
+        # At this point, we only extract the bare minimum information
+        # needed to continue processing. If all validations pass, the
+        # event will be inserted into the database, at which point we
+        # can process the minidump and extract a little more information.
+
+        validated = {
+            'platform': 'native',
+            'project': project.id,
+            'extra': data,
+            'errors': [],
+            'sentry.interfaces.User': {
+                'ip_address': self.context.ip_address,
+            },
+        }
+
+        # Copy/pasted from above in ClientApiHelper.validate_data
+        if release:
+            release = six.text_type(release)
+            if len(release) <= 64:
+                validated['release'] = release
+            else:
+                validated['errors'].append({
+                    'type': EventError.VALUE_TOO_LONG,
+                    'name': 'release',
+                    'value': release,
+                })
+
+        return validated
+
+    def insert_data_to_database(self, data, from_reprocessing=False):
+        # Seems like the event is valid and we can do some more expensive
+        # work on the minidump. That is, persisting the file itself for
+        # later postprocessing and extracting some more information from
+        # the minidump to populate the initial callstacks and exception
+        # information.
+        event_id = data['event_id']
+        minidump = data['extra'].pop('upload_file_minidump')
+        merge_minidump_event(data, minidump.temporary_file_path())
+        upload_minidump(minidump, event_id)
+
+        # All more advanced analysis, such as stack frame symbolication,
+        # requires a proper stacktrace, which requires call frame infos
+        # (CFI) for more accurate stackwalking. This task is executed
+        # even before starting the native language plugin, which will
+        # ultimately perform stack frame symbolication.
+
+        # Continue with persisting the event in the usual manner and
+        # schedule default preprocessing tasks
+        super(MinidumpApiHelper, self).insert_data_to_database(
+            data, from_reprocessing)
+
+
 class CspApiHelper(ClientApiHelper):
     def origin_from_request(self, request):
         # We don't use an origin here

+ 1 - 0
src/sentry/features/__init__.py

@@ -26,6 +26,7 @@ default_manager.add('projects:sample-events', ProjectFeature)  # NOQA
 default_manager.add('projects:similarity-indexing', ProjectFeature)  # NOQA
 default_manager.add('projects:custom-filters', ProjectFeature)  # NOQA
 default_manager.add('projects:custom-inbound-filters', ProjectFeature)  # NOQA
+default_manager.add('projects:minidump', ProjectFeature)  # NOQA
 
 # expose public api
 add = default_manager.add

+ 38 - 0
src/sentry/middleware/proxy.py

@@ -1,10 +1,36 @@
 from __future__ import absolute_import
 
+import io
 import six
+try:
+    import uwsgi
+    has_uwsgi = True
+except ImportError:
+    has_uwsgi = False
 
 from django.conf import settings
 
 
+if has_uwsgi:
+    class UWsgiChunkedInput(io.RawIOBase):
+        def __init__(self):
+            self._internal_buffer = b''
+
+        def readable(self):
+            return True
+
+        def readinto(self, buf):
+            if not self._internal_buffer:
+                self._internal_buffer = uwsgi.chunked_read()
+
+            n = min(len(buf), len(self._internal_buffer))
+            if n > 0:
+                buf[:n] = self._internal_buffer[:n]
+                self._internal_buffer = self._internal_buffer[n:]
+
+            return n
+
+
 class SetRemoteAddrFromForwardedFor(object):
     def __init__(self):
         if not getattr(settings, 'SENTRY_USE_X_FORWARDED_FOR', True):
@@ -26,6 +52,18 @@ class SetRemoteAddrFromForwardedFor(object):
             request.META['REMOTE_ADDR'] = real_ip
 
 
+class ChunkedMiddleware(object):
+    def process_request(self, request):
+        # If we are dealing with chunked data and we have uwsgi we assume
+        # that we can read to the end of the input stream so we can bypass
+        # the default limited stream.  We set the content length reasonably
+        # high so that the reads generally succeeed.  This is ugly but with
+        # Django 1.6 it seems to be the best we can easily do.
+        if has_uwsgi and request.META.get('HTTP_TRANSFER_ENCODING', '').lower() == 'chunked':
+            request._stream = io.BufferedReader(UWsgiChunkedInput())
+            request.META['CONTENT_LENGTH'] = six.binary_type(0xffffffff)
+
+
 class ContentLengthHeaderMiddleware(object):
     """
     Ensure that we have a proper Content-Length/Transfer-Encoding header

+ 116 - 0
src/sentry/models/minidump.py

@@ -0,0 +1,116 @@
+"""
+sentry.models.minidump
+~~~~~~~~~~~~~~~~~~~~~~
+
+:copyright: (c) 2010-2016 by the Sentry Team, see AUTHORS for more details.
+:license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import absolute_import
+
+from django.db import models, transaction
+from symbolic import ProcessState
+
+from sentry.constants import LOG_LEVELS_MAP
+from sentry.db.models import FlexibleForeignKey, Model, sane_repr
+from sentry.models.file import File
+
+
+class MinidumpFile(Model):
+    __core__ = False
+
+    file = FlexibleForeignKey('sentry.File')
+    event_id = models.CharField(max_length=36, unique=True)
+
+    class Meta:
+        db_table = 'sentry_minidumpfile'
+        app_label = 'sentry'
+
+    __repr__ = sane_repr('event_id')
+
+    def delete(self, *args, **kwargs):
+        super(MinidumpFile, self).delete(*args, **kwargs)
+        self.file.delete()
+
+
+def upload_minidump(fileobj, event_id):
+    """Creates a new minidump file object and stores it."""
+    with transaction.atomic():
+        file = File.objects.create(
+            name=event_id,
+            type='event.minidump',
+            headers={'Content-Type': 'application/x-minidump'},
+        )
+
+        file.putfile(fileobj)
+
+        return MinidumpFile.objects.create(
+            file=file,
+            event_id=event_id,
+        )
+
+
+def merge_minidump_event(data, minidump_path):
+    state = ProcessState.from_minidump(minidump_path)
+
+    data['level'] = LOG_LEVELS_MAP['fatal'] if state.crashed else LOG_LEVELS_MAP['info']
+    data['message'] = 'Assertion Error: %s' % state.assertion if state.assertion \
+        else 'Fatal Error: %s' % state.crash_reason
+
+    if state.timestamp:
+        data['timestamp'] = float(state.timestamp)
+
+    # Extract as much system information as we can. TODO: We should create
+    # a custom context and implement a specific minidump view in the event
+    # UI.
+    info = state.system_info
+    context = data.setdefault('contexts', {})
+    os = context.setdefault('os', {})
+    device = context.setdefault('device', {})
+    os['name'] = info.os_name
+    os['version'] = info.os_version
+    device['arch'] = info.cpu_family
+
+    # We can extract stack traces here already but since CFI is not
+    # available yet (without debug symbols), the stackwalker will
+    # resort to stack scanning which yields low-quality results. If
+    # the user provides us with debug symbols, we will reprocess this
+    # minidump and add improved stacktraces later.
+    threads = [{
+        'id': thread.thread_id,
+        'crashed': False,
+        'stacktrace': {
+            'frames': [{
+                'instruction_addr': frame.instruction,
+                'function': '<unknown>',  # Required by interface
+            } for frame in thread.frames()],
+        },
+    } for thread in state.threads()]
+    data.setdefault('threads', {})['values'] = threads
+
+    # Mark the crashed thread and add its stacktrace to the exception
+    crashed_thread = threads[state.requesting_thread]
+    crashed_thread['crashed'] = True
+
+    # Extract the crash reason and infos
+    exception = {
+        'value': data['message'],
+        'thread_id': crashed_thread['id'],
+        'type': state.crash_reason,
+        # Move stacktrace here from crashed_thread (mutating!)
+        'stacktrace': crashed_thread.pop('stacktrace'),
+    }
+
+    data.setdefault('exception', {}) \
+        .setdefault('values', []) \
+        .append(exception)
+
+    # Extract referenced (not all loaded) images
+    images = [{
+        'type': 'apple',  # Required by interface
+        'uuid': module.uuid,
+        'image_addr': module.addr,
+        'image_size': module.size,
+        'name': module.name,
+    } for module in state.modules()]
+    data.setdefault('debug_meta', {})['images'] = images

+ 19 - 3
src/sentry/models/projectkey.py

@@ -55,7 +55,8 @@ class ProjectKey(Model):
     status = BoundedPositiveIntegerField(
         default=0,
         choices=(
-            (ProjectKeyStatus.ACTIVE, _('Active')), (ProjectKeyStatus.INACTIVE, _('Inactive')),
+            (ProjectKeyStatus.ACTIVE, _('Active')),
+            (ProjectKeyStatus.INACTIVE, _('Inactive')),
         ),
         db_index=True
     )
@@ -105,7 +106,8 @@ class ProjectKey(Model):
             # ValueError would come from a non-integer project_id,
             # which is obviously a DoesNotExist. We catch and rethrow this
             # so anything downstream expecting DoesNotExist works fine
-            raise ProjectKey.DoesNotExist('ProjectKey matching query does not exist.')
+            raise ProjectKey.DoesNotExist(
+                'ProjectKey matching query does not exist.')
 
     @classmethod
     def get_default(cls, project):
@@ -167,7 +169,21 @@ class ProjectKey(Model):
             endpoint = options.get('system.url-prefix')
 
         return '%s%s?sentry_key=%s' % (
-            endpoint, reverse('sentry-api-csp-report', args=[self.project_id]), self.public_key,
+            endpoint,
+            reverse('sentry-api-csp-report', args=[self.project_id]),
+            self.public_key,
+        )
+
+    @property
+    def minidump_endpoint(self):
+        endpoint = settings.SENTRY_PUBLIC_ENDPOINT or settings.SENTRY_ENDPOINT
+        if not endpoint:
+            endpoint = options.get('system.url-prefix')
+
+        return '%s%s?sentry_key=%s' % (
+            endpoint,
+            reverse('sentry-api-minidump', args=[self.project_id]),
+            self.public_key,
         )
 
     def get_allowed_origins(self):

Некоторые файлы не были показаны из-за большого количества измененных файлов