Browse Source

feat(grouping): Support for grouping enhancements and fingerprinting (#12679)

This PR adds support for grouping enhancements and server side
fingerprinting. The former requires a newstyle grouping algorithm to be
selected, the former works for all grouping strategies. This also adds
UI to configure grouping enhancements and fingerprinting which is
feature flagged currently.

This UI is feature gated and likely never available to customers in the
current form except for testing purposes. A subset might become
available.
Armin Ronacher 6 years ago
parent
commit
bb6a9acb50

+ 1 - 1
requirements-base.txt

@@ -56,7 +56,7 @@ redis>=2.10.3,<2.10.6
 requests-oauthlib==0.3.3
 requests[security]>=2.20.0,<2.21.0
 selenium==3.141.0
-semaphore>=0.4.21,<0.5.0
+semaphore>=0.4.24,<0.5.0
 sentry-sdk>=0.7.0
 setproctitle>=1.1.7,<1.2.0
 simplejson>=3.2.0,<3.9.0

+ 2 - 2
src/sentry/api/endpoints/grouping_configs.py

@@ -12,6 +12,6 @@ class GroupingConfigsEndpoint(Endpoint):
 
     def get(self, request):
         return Response(serialize([
-            config().as_dict() for config in sorted(CONFIGURATIONS.values(),
-                                                    key=lambda x: x.id)
+            config.as_dict() for config in sorted(CONFIGURATIONS.values(),
+                                                  key=lambda x: x.id)
         ]))

+ 17 - 0
src/sentry/api/endpoints/grouping_enhancements.py

@@ -0,0 +1,17 @@
+from __future__ import absolute_import
+
+from rest_framework.response import Response
+
+from sentry.api.base import Endpoint
+from sentry.api.serializers import serialize
+from sentry.grouping.enhancer import ENHANCEMENT_BASES
+
+
+class GroupingEnhancementsEndpoint(Endpoint):
+    permission_classes = ()
+
+    def get(self, request):
+        return Response(serialize([
+            e.as_dict() for e in sorted(ENHANCEMENT_BASES.values(),
+                                        key=lambda x: x.id)
+        ]))

+ 43 - 0
src/sentry/api/endpoints/project_details.py

@@ -24,6 +24,8 @@ from sentry.models import (
     AuditLogEntryEvent, Group, GroupStatus, Project, ProjectBookmark, ProjectRedirect,
     ProjectStatus, ProjectTeam, UserOption,
 )
+from sentry.grouping.enhancer import Enhancements, InvalidEnhancerConfig
+from sentry.grouping.fingerprinting import FingerprintingRules, InvalidFingerprintingConfig
 from sentry.tasks.deletion import delete_project
 from sentry.utils.apidocs import scenario, attach_scenarios
 
@@ -98,6 +100,9 @@ class ProjectAdminSerializer(ProjectMemberSerializer):
     relayPiiConfig = serializers.CharField(required=False)
     scrubIPAddresses = serializers.BooleanField(required=False)
     groupingConfig = serializers.CharField(required=False)
+    groupingEnhancements = serializers.CharField(required=False)
+    groupingEnhancementsBase = serializers.CharField(required=False)
+    fingerprintingRules = serializers.CharField(required=False)
     scrapeJavaScript = serializers.BooleanField(required=False)
     allowedDomains = ListField(child=OriginField(), required=False)
     resolveAge = serializers.IntegerField(required=False)
@@ -168,6 +173,28 @@ class ProjectAdminSerializer(ProjectMemberSerializer):
             )
         return attrs
 
+    def validate_groupingEnhancements(self, attrs, source):
+        if not attrs[source]:
+            return attrs
+
+        try:
+            Enhancements.from_config_string(attrs[source])
+        except InvalidEnhancerConfig as e:
+            raise serializers.ValidationError(e.message)
+
+        return attrs
+
+    def validate_fingerprintingRules(self, attrs, source):
+        if not attrs[source]:
+            return attrs
+
+        try:
+            FingerprintingRules.from_config_string(attrs[source])
+        except InvalidFingerprintingConfig as e:
+            raise serializers.ValidationError(e.message)
+
+        return attrs
+
     def validate_copy_from_project(self, attrs, source):
         other_project_id = attrs[source]
 
@@ -380,6 +407,17 @@ class ProjectDetailsEndpoint(ProjectEndpoint):
         if result.get('groupingConfig') is not None:
             if project.update_option('sentry:grouping_config', result['groupingConfig']):
                 changed_proj_settings['sentry:grouping_config'] = result['groupingConfig']
+        if result.get('groupingEnhancements') is not None:
+            if project.update_option('sentry:grouping_enhancements',
+                                     result['groupingEnhancements']):
+                changed_proj_settings['sentry:grouping_enhancements'] = result['groupingEnhancements']
+        if result.get('groupingEnhancementsBase') is not None:
+            if project.update_option('sentry:grouping_enhancements_base',
+                                     result['groupingEnhancementsBase']):
+                changed_proj_settings['sentry:grouping_enhancements_base'] = result['groupingEnhancementsBase']
+        if result.get('fingerprintingRules') is not None:
+            if project.update_option('sentry:fingerprinting_rules', result['fingerprintingRules']):
+                changed_proj_settings['sentry:fingerprinting_rules'] = result['fingerprintingRules']
         if result.get('securityToken') is not None:
             if project.update_option('sentry:token', result['securityToken']):
                 changed_proj_settings['sentry:token'] = result['securityToken']
@@ -482,6 +520,11 @@ class ProjectDetailsEndpoint(ProjectEndpoint):
                     'sentry:grouping_config',
                     options['sentry:grouping_config'],
                 )
+            if 'sentry:fingerprinting_rules' in options:
+                project.update_option(
+                    'sentry:fingerprinting_rules',
+                    options['sentry:fingerprinting_rules'],
+                )
             if 'mail:subject_prefix' in options:
                 project.update_option(
                     'mail:subject_prefix',

+ 7 - 0
src/sentry/api/serializers/models/project.py

@@ -21,6 +21,7 @@ from sentry.models import (
     ProjectStatus, ProjectTeam, Release, ReleaseProjectEnvironment, Deploy, UserOption, DEFAULT_SUBJECT_TEMPLATE
 )
 from sentry.grouping.strategies.configurations import DEFAULT_CONFIG as DEFAULT_GROUPING_CONFIG
+from sentry.grouping.enhancer import DEFAULT_ENHANCEMENT_BASE
 from sentry.utils.data_filters import FilterTypes
 from sentry.utils.db import is_postgres
 
@@ -425,6 +426,9 @@ class DetailedProjectSerializer(ProjectWithTeamSerializer):
             'sentry:verify_ssl',
             'sentry:scrub_ip_address',
             'sentry:grouping_config',
+            'sentry:grouping_enhancements',
+            'sentry:grouping_enhancements_base',
+            'sentry:fingerprinting_rules',
             'sentry:relay_pii_config',
             'feedback:branding',
             'digests:mail:minimum_delay',
@@ -543,6 +547,9 @@ class DetailedProjectSerializer(ProjectWithTeamSerializer):
                 'scrubIPAddresses': bool(attrs['options'].get('sentry:scrub_ip_address', False)),
                 'scrapeJavaScript': bool(attrs['options'].get('sentry:scrape_javascript', True)),
                 'groupingConfig': attrs['options'].get('sentry:grouping_config') or DEFAULT_GROUPING_CONFIG,
+                'groupingEnhancements': attrs['options'].get('sentry:grouping_enhancements') or u'',
+                'groupingEnhancementsBase': attrs['options'].get('sentry:grouping_enhancements_base') or DEFAULT_ENHANCEMENT_BASE,
+                'fingerprintingRules': attrs['options'].get('sentry:fingerprinting_rules') or u'',
                 'organization':
                 attrs['org'],
                 'plugins':

+ 5 - 0
src/sentry/api/urls.py

@@ -217,6 +217,7 @@ from .endpoints.user_subscriptions import UserSubscriptionsEndpoint
 from .endpoints.event_file_committers import EventFileCommittersEndpoint
 from .endpoints.setup_wizard import SetupWizard
 from .endpoints.grouping_configs import GroupingConfigsEndpoint
+from .endpoints.grouping_enhancements import GroupingEnhancementsEndpoint
 
 
 urlpatterns = patterns(
@@ -1292,6 +1293,10 @@ urlpatterns = patterns(
         r'^grouping-configs/$', GroupingConfigsEndpoint.as_view(),
         name='sentry-api-0-grouping-configs'
     ),
+    url(
+        r'^grouping-enhancements/$', GroupingEnhancementsEndpoint.as_view(),
+        name='sentry-api-0-grouping-enhancements'
+    ),
 
     # Internal
     url(r'^internal/health/$', SystemHealthEndpoint.as_view(),

+ 8 - 3
src/sentry/event_manager.py

@@ -20,7 +20,9 @@ from sentry import buffer, eventtypes, eventstream, features, tagstore, tsdb, fi
 from sentry.constants import (
     LOG_LEVELS, LOG_LEVELS_MAP, VALID_PLATFORMS, MAX_TAG_VALUE_LENGTH,
 )
-from sentry.grouping.api import get_grouping_config_dict_for_project
+from sentry.grouping.api import get_grouping_config_dict_for_project, \
+    get_grouping_config_dict_for_event_data, load_grouping_config, \
+    apply_server_fingerprinting, get_fingerprinting_config_for_project
 from sentry.coreapi import (
     APIError,
     APIForbidden,
@@ -55,7 +57,7 @@ from sentry.utils.safe import safe_execute, trim, get_path, setdefault_path
 from sentry.utils.geo import rust_geoip
 from sentry.utils.validators import is_float
 from sentry.utils.contexts_normalization import normalize_user_agent
-from sentry.stacktraces import normalize_in_app
+from sentry.stacktraces import normalize_stacktraces_for_grouping
 from sentry.culprit import generate_culprit
 
 
@@ -690,7 +692,9 @@ class EventManager(object):
 
         # At this point we want to normalize the in_app values in case the
         # clients did not set this appropriately so far.
-        normalize_in_app(data)
+        grouping_config = load_grouping_config(
+            get_grouping_config_dict_for_event_data(data, project))
+        normalize_stacktraces_for_grouping(data, grouping_config)
 
         for plugin in plugins.for_project(project, version=None):
             added_tags = safe_execute(plugin.get_tags, event, _with_transaction=False)
@@ -713,6 +717,7 @@ class EventManager(object):
         # removed it from the payload.  The call to get_hashes will then
         # look at `grouping_config` to pick the right paramters.
         data['fingerprint'] = data.get('fingerprint') or ['{{ default }}']
+        apply_server_fingerprinting(data, get_fingerprinting_config_for_project(project))
         hashes = event.get_hashes()
         data['hashes'] = hashes
 

+ 1 - 1
src/sentry/eventtypes/error.py

@@ -15,7 +15,7 @@ def get_crash_location(exception, platform=None):
         if fn:
             func = frame.get('function')
             if func is not None:
-                from sentry.interfaces.stacktrace import trim_function_name
+                from sentry.grouping.strategies.utils import trim_function_name
                 func = trim_function_name(func, frame.get('platform') or platform)
             if frame.get('in_app'):
                 return fn, func

+ 85 - 5
src/sentry/grouping/api.py

@@ -7,6 +7,8 @@ from sentry.grouping.strategies.configurations import CONFIGURATIONS, DEFAULT_CO
 from sentry.grouping.component import GroupingComponent
 from sentry.grouping.variants import ChecksumVariant, FallbackVariant, \
     ComponentVariant, CustomFingerprintVariant, SaltedComponentVariant
+from sentry.grouping.enhancer import Enhancements, InvalidEnhancerConfig, \
+    DEFAULT_ENHANCEMENTS_CONFIG, DEFAULT_ENHANCEMENT_BASE, ENHANCEMENT_BASES
 from sentry.grouping.utils import DEFAULT_FINGERPRINT_VALUES, hash_from_values
 
 
@@ -29,9 +31,7 @@ def get_grouping_config_dict_for_project(project, silent=True):
     if config_id is None:
         config_id = DEFAULT_CONFIG
     else:
-        try:
-            CONFIGURATIONS[config_id]
-        except KeyError:
+        if config_id not in CONFIGURATIONS:
             if not silent:
                 raise ConfigNotFoundException(config_id)
             config_id = DEFAULT_CONFIG
@@ -40,13 +40,51 @@ def get_grouping_config_dict_for_project(project, silent=True):
     # such as frames that mark the end of a stacktrace and more.
     return {
         'id': config_id,
+        'enhancements': _get_project_enhancements_config(project),
     }
 
 
-def get_default_grouping_config_dict():
+def get_grouping_config_dict_for_event_data(data, project):
+    """Returns the grouping config for an event dictionary."""
+    return data.get('grouping_config') \
+        or get_grouping_config_dict_for_project(project)
+
+
+def _get_project_enhancements_config(project):
+    enhancements = project.get_option('sentry:grouping_enhancements')
+    enhancements_base = project.get_option('sentry:grouping_enhancements_base')
+    if not enhancements and not enhancements_base:
+        return DEFAULT_ENHANCEMENTS_CONFIG
+
+    if enhancements_base is None or enhancements_base not in ENHANCEMENT_BASES:
+        enhancements_base = DEFAULT_ENHANCEMENT_BASE
+
+    # Instead of parsing and dumping out config here, we can make a
+    # shortcut
+    from sentry.utils.cache import cache
+    from sentry.utils.hashlib import md5_text
+    cache_key = 'grouping-enhancements:' + \
+        md5_text('%s|%s' % (enhancements_base, enhancements)).hexdigest()
+    rv = cache.get(cache_key)
+    if rv is not None:
+        return rv
+
+    try:
+        rv = Enhancements.from_config_string(
+            enhancements or '', bases=[enhancements_base]).dumps()
+    except InvalidEnhancerConfig:
+        rv = DEFAULT_ENHANCEMENTS_CONFIG
+    cache.set(cache_key, rv)
+    return rv
+
+
+def get_default_grouping_config_dict(id=None):
     """Returns the default grouping config."""
+    if id is None:
+        id = DEFAULT_CONFIG
     return {
-        'id': DEFAULT_CONFIG,
+        'id': id,
+        'enhancements': DEFAULT_ENHANCEMENTS_CONFIG,
     }
 
 
@@ -63,6 +101,48 @@ def load_grouping_config(config_dict=None):
     return CONFIGURATIONS[config_id](**config_dict)
 
 
+def get_fingerprinting_config_for_project(project):
+    from sentry.grouping.fingerprinting import FingerprintingRules, \
+        InvalidFingerprintingConfig
+    rules = project.get_option('sentry:fingerprint_rules')
+    if not rules:
+        return FingerprintingRules([])
+
+    from sentry.utils.cache import cache
+    from sentry.utils.hashlib import md5_text
+    cache_key = 'fingerprinting-rules:' + md5_text(rules).hexdigest()
+    rv = cache.get(cache_key)
+    if rv is not None:
+        return FingerprintingRules.from_json(rv)
+
+    try:
+        rv = FingerprintingRules.from_config_string(
+            rules or '')
+    except InvalidFingerprintingConfig:
+        rv = FingerprintingRules([])
+    cache.set(cache_key, rv.to_json())
+    return rv
+
+
+def apply_server_fingerprinting(event, config):
+    fingerprint = event['fingerprint']
+    if not any(x in DEFAULT_FINGERPRINT_VALUES for x in fingerprint):
+        return
+
+    new_values = config.get_fingerprint_values_for_event(event)
+    if new_values is None:
+        return
+
+    new_fingerprint = []
+    for value in fingerprint:
+        if value in DEFAULT_FINGERPRINT_VALUES:
+            new_fingerprint.extend(new_values)
+        else:
+            new_fingerprint.append(value)
+
+    event['fingerprint'] = new_fingerprint
+
+
 def _get_calculated_grouping_variants_for_event(event, config):
     winning_strategy = None
     precedence_hint = None

+ 32 - 0
src/sentry/grouping/enhancement-configs/common:2019-03-23.txt

@@ -0,0 +1,32 @@
+## * The default configuration of stacktrace grouping enhancers
+
+# exclude common paths
+family:native path:/usr/lib/**                                    -app
+family:native path:/usr/local/lib/**                              -app
+family:native path:/usr/local/Cellar/**                           -app
+
+# rust common modules
+family:native function:std::*                                     -app
+family:native function:core::*                                    -app
+family:native function:alloc::*                                   -app
+family:native function:__rust_*                                   -app
+
+# rust borders
+family:native function:std::panicking::begin_panic                ^-group -group -app
+family:native function:core::panicking::begin_panic               ^-group -group -app
+family:native function:failure::backtrace::Backtrace::new         ^-group -group -app
+family:native function:error_chain::make_backtrace                ^-group -group -app
+
+# C++ borders
+family:native function:_CxxThrowException                         ^-group -group -app
+family:native function:__cxa_throw                                ^-group -group -app
+family:native function:__assert_rtn                               ^-group -group -app
+
+# Objective-C
+family:native function:_NSRaiseError                              ^-group -group -app
+family:native function:_mh_execute_header                         -group -app
+
+# Breakpad
+family:native function:google_breakpad::*                         -app -group
+family:native function:google_breakpad::ExceptionHandler::SignalHandler ^-group -group
+family:native function:google_breakpad::ExceptionHandler::WriteMinidumpWithException ^-group -group

Some files were not shown because too many files changed in this diff