Browse Source

ref(filters): Use relay config instead of direct DB access (#13770)

Refactored message filters to take relay_config rather than doing db access
Radu Woinaroski 5 years ago
parent
commit
5cdf1bbd49

+ 16 - 12
src/sentry/api/endpoints/project_filter_details.py

@@ -2,10 +2,10 @@ from __future__ import absolute_import
 
 from rest_framework.response import Response
 
-from sentry import filters
+from sentry import message_filters
 from sentry.api.bases.project import ProjectEndpoint
 from sentry.api.exceptions import ResourceDoesNotExist
-from sentry.models import AuditLogEntryEvent
+from sentry.models.auditlogentry import AuditLogEntryEvent
 import six
 
 
@@ -19,21 +19,25 @@ class ProjectFilterDetailsEndpoint(ProjectEndpoint):
             {method} {path}
 
         """
-        try:
-            filter = filters.get(filter_id)(project)
-        except filters.FilterNotRegistered:
-            raise ResourceDoesNotExist
+        current_filter = None
+        for flt in message_filters.get_all_filters():
+            if flt.spec.id == filter_id:
+                current_filter = flt
+                break
+        else:
+            raise ResourceDoesNotExist  # could not find filter with the requested id
 
-        serializer = filter.serializer_cls(data=request.DATA, partial=True)
+        serializer = current_filter.spec.serializer_cls(data=request.DATA, partial=True)
 
         if not serializer.is_valid():
             return Response(serializer.errors, status=400)
 
-        current_state = filter.is_enabled()
-        new_state = filter.enable(serializer.object)
+        current_state = message_filters.get_filter_state(filter_id, project)
+
+        new_state = message_filters.set_filter_state(filter_id, project, serializer.object)
         audit_log_state = AuditLogEntryEvent.PROJECT_ENABLE
 
-        if filter.id == 'legacy-browsers':
+        if filter_id == 'legacy-browsers':
             if isinstance(current_state, bool) or new_state == 0 or isinstance(
                     new_state, six.binary_type):
                 returned_state = new_state
@@ -52,8 +56,8 @@ class ProjectFilterDetailsEndpoint(ProjectEndpoint):
             elif new_state == current_state:
                 returned_state = new_state
 
-        if filter.id in ('browser-extensions', 'localhost', 'web-crawlers'):
-            returned_state = filter.id
+        if filter_id in ('browser-extensions', 'localhost', 'web-crawlers'):
+            returned_state = filter_id
             removed = current_state - new_state
 
             if removed == 1:

+ 8 - 7
src/sentry/api/endpoints/project_filters.py

@@ -2,7 +2,7 @@ from __future__ import absolute_import
 
 from rest_framework.response import Response
 
-from sentry import filters
+from sentry import message_filters
 from sentry.api.bases.project import ProjectEndpoint
 
 
@@ -17,16 +17,17 @@ class ProjectFiltersEndpoint(ProjectEndpoint):
 
         """
         results = []
-        for f_cls in filters.all():
-            filter = f_cls(project)
+        for flt in message_filters.get_all_filters():
+            filter_spec = flt.spec
             results.append(
                 {
-                    'id': filter.id,
+                    'id': filter_spec.id,
                     # 'active' will be either a boolean or list for the legacy browser filters
                     # all other filters will be boolean
-                    'active': filter.is_enabled(),
-                    'description': filter.description,
-                    'name': filter.name,
+                    'active': message_filters.get_filter_state(filter_spec.id, project),
+                    'description': filter_spec.description,
+                    'name': filter_spec.name,
+                    'hello': filter_spec.id + " - " + filter_spec.name
                 }
             )
         results.sort(key=lambda x: x['name'])

+ 4 - 7
src/sentry/event_manager.py

@@ -19,11 +19,12 @@ from django.db.models import Func
 from django.utils import timezone
 from django.utils.encoding import force_text
 
-from sentry import buffer, eventtypes, eventstream, features, tagstore, tsdb, filters
+from sentry import buffer, eventtypes, eventstream, features, tagstore, tsdb
 from sentry.constants import (
     DEFAULT_STORE_NORMALIZER_ARGS, LOG_LEVELS, LOG_LEVELS_MAP,
     MAX_TAG_VALUE_LENGTH, MAX_SECS_IN_FUTURE, MAX_SECS_IN_PAST,
 )
+from sentry.message_filters import should_filter_event
 from sentry.grouping.api import (
     get_grouping_config_dict_for_project,
     get_grouping_config_dict_for_event_data, load_grouping_config,
@@ -291,6 +292,7 @@ class EventManager(object):
         self._is_renormalize = is_renormalize
         self._remove_other = remove_other
         self._normalized = False
+        self.relay_config = relay_config
 
     def process_csp_report(self):
         """Only called from the CSP report endpoint."""
@@ -408,12 +410,7 @@ class EventManager(object):
             if message and not is_valid_error_message(self._project, message):
                 return (True, FilterStatKeys.ERROR_MESSAGE)
 
-        for filter_cls in filters.all():
-            filter_obj = filter_cls(self._project)
-            if filter_obj.is_enabled() and filter_obj.test(self._data):
-                return (True, six.text_type(filter_obj.id))
-
-        return (False, None)
+        return should_filter_event(self.relay_config, self._data)
 
     def get_data(self):
         return self._data

+ 0 - 25
src/sentry/filters/__init__.py

@@ -1,25 +0,0 @@
-from __future__ import absolute_import, print_function
-
-__all__ = [
-    'Filter', 'FilterManager', 'FilterNotRegistered', 'all', 'exists', 'get', 'register',
-    'unregister'
-]
-
-from .base import Filter  # NOQA
-from .manager import FilterManager  # NOQA
-
-from .localhost import LocalhostFilter
-from .browser_extensions import BrowserExtensionsFilter
-from .legacy_browsers import LegacyBrowsersFilter
-from .web_crawlers import WebCrawlersFilter
-
-default_manager = FilterManager([
-    LocalhostFilter,
-    BrowserExtensionsFilter,
-    LegacyBrowsersFilter,
-    WebCrawlersFilter,
-])
-
-all = default_manager.all
-exists = default_manager.exists
-get = default_manager.get

+ 0 - 50
src/sentry/filters/base.py

@@ -1,50 +0,0 @@
-from __future__ import absolute_import
-
-__all__ = ['Filter']
-
-from sentry.models import ProjectOption
-from sentry.signals import inbound_filter_toggled
-from rest_framework import serializers
-
-
-class FilterSerializer(serializers.Serializer):
-    active = serializers.BooleanField()
-
-
-class Filter(object):
-    id = None
-    description = None
-    name = None
-    default = False
-    serializer_cls = FilterSerializer
-
-    def __init__(self, project):
-        self.project = project
-
-    def is_enabled(self):
-        return ProjectOption.objects.get_value(
-            project=self.project,
-            key=u'filters:{}'.format(self.id),
-            default='1' if self.default else '0',
-        ) == '1'
-
-    def enable(self, value=None):
-        if value is None:
-            value = {'active': True}
-
-        ProjectOption.objects.set_value(
-            project=self.project,
-            key=u'filters:{}'.format(self.id),
-            value='1' if value.get('active', False) else '0',
-        )
-
-        if value:
-            inbound_filter_toggled.send(project=self.project, sender=self)
-
-        return value.get('active', False)
-
-    def disable(self):
-        return self.enable(False)
-
-    def test(self):
-        return False

+ 0 - 111
src/sentry/filters/browser_extensions.py

@@ -1,111 +0,0 @@
-from __future__ import absolute_import
-
-from .base import Filter
-
-import re
-
-from sentry.utils.data_filters import FilterStatKeys
-
-EXTENSION_EXC_VALUES = re.compile(
-    '|'.join(
-        (
-            re.escape(x)
-            for x in (
-                # Random plugins/extensions
-                'top.GLOBALS',
-                # See: http://blog.errorception.com/2012/03/tale-of-unfindable-js-error.html
-                'originalCreateNotification',
-                'canvas.contentDocument',
-                'MyApp_RemoveAllHighlights',
-                'http://tt.epicplay.com',
-                'Can\'t find variable: ZiteReader',
-                'jigsaw is not defined',
-                'ComboSearch is not defined',
-                'http://loading.retry.widdit.com/',
-                'atomicFindClose',
-                # Facebook borked
-                'fb_xd_fragment',
-                # ISP "optimizing" proxy - `Cache-Control: no-transform` seems to
-                # reduce this. (thanks @acdha)
-                # See http://stackoverflow.com/questions/4113268
-                'bmi_SafeAddOnload',
-                'EBCallBackMessageReceived',
-                # See
-                # https://groups.google.com/a/chromium.org/forum/#!topic/chromium-discuss/7VU0_VvC7mE
-                '_gCrWeb',
-                # See http://toolbar.conduit.com/Debveloper/HtmlAndGadget/Methods/JSInjection.aspx
-                'conduitPage',
-                # Google Search app (iOS)
-                # See: https://github.com/getsentry/raven-js/issues/756
-                'null is not an object (evaluating \'elt.parentNode\')',
-                # Dragon Web Extension from Nuance Communications
-                # See: https://forum.sentry.io/t/error-in-raven-js-plugin-setsuspendstate/481/
-                'plugin.setSuspendState is not a function',
-            )
-        )
-    ),
-    re.I
-)
-
-EXTENSION_EXC_SOURCES = re.compile(
-    '|'.join(
-        (
-            # Facebook flakiness
-            r'graph\.facebook\.com',
-            # Facebook blocked
-            r'connect\.facebook\.net',
-            # Woopra flakiness
-            r'eatdifferent\.com\.woopra-ns\.com',
-            r'static\.woopra\.com\/js\/woopra\.js',
-            # Chrome extensions
-            r'^chrome(?:-extension)?:\/\/',
-            # Cacaoweb
-            r'127\.0\.0\.1:4001\/isrunning',
-            # Other
-            r'webappstoolbarba\.texthelp\.com\/',
-            r'metrics\.itunes\.apple\.com\.edgesuite\.net\/',
-            # Kaspersky Protection browser extension
-            r'kaspersky-labs\.com',
-        )
-    ),
-    re.I
-)
-
-
-class BrowserExtensionsFilter(Filter):
-    id = FilterStatKeys.BROWSER_EXTENSION
-    name = 'Filter out errors known to be caused by browser extensions'
-    description = 'Certain browser extensions will inject inline scripts and are known to cause errors.'
-
-    def get_exception_value(self, data):
-        try:
-            return data['exception']['values'][0]['value']
-        except (LookupError, TypeError):
-            return ''
-
-    def get_exception_source(self, data):
-        try:
-            return data['exception']['values'][0]['stacktrace']['frames'
-                                                                ][-1]['abs_path']
-        except (LookupError, TypeError):
-            return ''
-
-    def test(self, data):
-        """
-        Test the exception value to determine if it looks like the error is
-        caused by a common browser extension.
-        """
-        if data.get('platform') != 'javascript':
-            return False
-
-        exc_value = self.get_exception_value(data)
-        if exc_value:
-            if EXTENSION_EXC_VALUES.search(exc_value):
-                return True
-
-        exc_source = self.get_exception_source(data)
-        if exc_source:
-            if EXTENSION_EXC_SOURCES.search(exc_source):
-                return True
-
-        return False

+ 0 - 230
src/sentry/filters/legacy_browsers.py

@@ -1,230 +0,0 @@
-from __future__ import absolute_import
-
-from .base import Filter
-
-from ua_parser.user_agent_parser import Parse
-from rest_framework import serializers
-from sentry.models import ProjectOption
-from sentry.api.fields import MultipleChoiceField
-from sentry.utils.data_filters import FilterStatKeys
-from sentry.utils.safe import get_path
-
-"""
-For default (legacy) filter
-"""
-MIN_VERSIONS = {
-    'Chrome': 0,
-    'IE': 10,
-    'Firefox': 0,
-    'Safari': 6,
-    'Edge': 0,
-    'Opera': 15,
-    'Android': 4,
-    'Opera Mini': 8
-}
-
-
-class LegacyBrowserFilterSerializer(serializers.Serializer):
-    active = serializers.BooleanField()
-    subfilters = MultipleChoiceField(
-        choices=[
-            'ie_pre_9',
-            'ie9',
-            'ie10',
-            'opera_pre_15',
-            'android_pre_4',
-            'safari_pre_6',
-            'opera_mini_pre_8']
-    )
-
-
-class LegacyBrowsersFilter(Filter):
-    id = FilterStatKeys.LEGACY_BROWSER
-    name = 'Filter out known errors from legacy browsers'
-    description = 'Older browsers often give less accurate information, and while they may report valid issues, the context to understand them is incorrect or missing.'
-    default = False
-    serializer_cls = LegacyBrowserFilterSerializer
-
-    def is_enabled(self):
-        # May be either a '1' or an iterable for new style
-        # The javascript code requires this to return either
-        # a boolean or a list of subfilters depending on if all, none, or some
-        # legacy browsers should be filtered
-        rv = ProjectOption.objects.get_value(
-            project=self.project,
-            key=u'filters:{}'.format(self.id),
-            default='1' if self.default else '0',
-        )
-
-        if rv == '1':
-            return True
-        if rv == '0':
-            return False
-
-        return rv
-
-    def enable(self, value=None):
-        if value is None:
-            value = {}
-
-        option_val = '0'
-        if 'active' in value:
-            if value['active']:
-                option_val = '1'
-        elif 'subfilters' in value and len(value['subfilters']) > 0:
-            option_val = set(value['subfilters'])
-
-        ProjectOption.objects.set_value(
-            project=self.project,
-            key=u'filters:{}'.format(self.id),
-            value=option_val,
-        )
-
-        return option_val
-
-    def get_user_agent(self, data):
-        try:
-            for key, value in get_path(data, 'request', 'headers', filter=True) or ():
-                if key.lower() == 'user-agent':
-                    return value
-        except LookupError:
-            return ''
-
-    def filter_default(self, browser):
-        """
-        Legacy filter - new users specify individual filters
-        """
-        try:
-            minimum_version = MIN_VERSIONS[browser['family']]
-        except KeyError:
-            return False
-
-        try:
-            major_browser_version = int(browser['major'])
-        except (TypeError, ValueError):
-            return False
-
-        if minimum_version > major_browser_version:
-            return True
-
-        return False
-
-    def filter_opera_pre_15(self, browser):
-        if not browser['family'] == "Opera":
-            return False
-
-        try:
-            major_browser_version = int(browser['major'])
-        except (TypeError, ValueError):
-            return False
-
-        if major_browser_version < 15:
-            return True
-
-        return False
-
-    def filter_safari_pre_6(self, browser):
-        if not browser['family'] == "Safari":
-            return False
-
-        try:
-            major_browser_version = int(browser['major'])
-        except (TypeError, ValueError):
-            return False
-
-        if major_browser_version < 6:
-            return True
-
-        return False
-
-    def filter_android_pre_4(self, browser):
-        if not browser['family'] == "Android":
-            return False
-
-        try:
-            major_browser_version = int(browser['major'])
-        except (TypeError, ValueError):
-            return False
-
-        if major_browser_version < 4:
-            return True
-
-        return False
-
-    def filter_opera_mini_pre_8(self, browser):
-        if not browser['family'] == "Opera Mini":
-            return False
-
-        try:
-            major_browser_version = int(browser['major'])
-        except (TypeError, ValueError):
-            return False
-
-        if major_browser_version < 8:
-            return True
-
-        return False
-
-    def _filter_ie(self, browser, compare_version):
-        if not browser['family'] == "IE":
-            return False
-
-        try:
-            major_browser_version = int(browser['major'])
-        except (TypeError, ValueError):
-            return False
-
-        return compare_version(major_browser_version)
-
-    def filter_ie10(self, browser):
-        return self._filter_ie(browser, lambda major_ver: major_ver == 10)
-
-    def filter_ie9(self, browser):
-        return self._filter_ie(browser, lambda major_ver: major_ver == 9)
-
-    def filter_ie_pre_9(self, browser):
-        return self._filter_ie(browser, lambda major_ver: major_ver <= 8)
-
-    def test(self, data):
-        if data.get('platform') != 'javascript':
-            return False
-
-        opts = ProjectOption.objects.get_value(
-            project=self.project,
-            key=u'filters:{}'.format(self.id),
-        )
-
-        value = self.get_user_agent(data)
-        if not value:
-            return False
-
-        ua = Parse(value)
-        if not ua:
-            return False
-
-        browser = ua['user_agent']
-
-        if not browser['family']:
-            return False
-
-        # IE Desktop and IE Mobile use the same engines, therefore we can treat them as one
-        if browser['family'] == "IE Mobile":
-            browser['family'] = "IE"
-
-        # handle old style config
-        if opts == '1':
-            return self.filter_default(browser)
-
-        # New style is not a simple boolean, but a list of
-        # specific filters to apply
-        if opts:
-            for key in opts:
-                try:
-                    fn = getattr(self, 'filter_' + key)
-                except AttributeError:
-                    pass
-                else:
-                    if fn(browser):
-                        return True
-
-        return False

+ 0 - 27
src/sentry/filters/localhost.py

@@ -1,27 +0,0 @@
-from __future__ import absolute_import
-
-from .base import Filter
-from six.moves.urllib.parse import urlparse
-from sentry.utils.data_filters import FilterStatKeys
-from sentry.utils.safe import get_path
-
-LOCAL_IPS = frozenset(['127.0.0.1', '::1'])
-LOCAL_DOMAINS = frozenset(['127.0.0.1', 'localhost'])
-
-
-class LocalhostFilter(Filter):
-    id = FilterStatKeys.LOCALHOST
-    name = 'Filter out events coming from localhost'
-    description = 'This applies to both IPv4 (``127.0.0.1``) and IPv6 (``::1``) addresses.'
-
-    def get_ip_address(self, data):
-        return get_path(data, 'user', 'ip_address') or ''
-
-    def get_url(self, data):
-        return get_path(data, 'request', 'url') or ''
-
-    def get_domain(self, data):
-        return urlparse(self.get_url(data)).hostname
-
-    def test(self, data):
-        return self.get_ip_address(data) in LOCAL_IPS or self.get_domain(data) in LOCAL_DOMAINS

+ 0 - 32
src/sentry/filters/manager.py

@@ -1,32 +0,0 @@
-from __future__ import absolute_import, print_function
-
-__all__ = ['FilterManager', 'FilterNotRegistered']
-
-import six
-
-
-class FilterNotRegistered(Exception):
-    pass
-
-
-# TODO(dcramer): a lot of these managers are very similar and should abstracted
-# into some kind of base class
-class FilterManager(object):
-    def __init__(self, values):
-        self.__values = {cls.id: cls for cls in values}
-
-    def __iter__(self):
-        return six.itervalues(self.__values)
-
-    def all(self):
-        return iter(self)
-
-    def get(self, id):
-        try:
-            cls = self.__values[id]
-        except KeyError:
-            raise FilterNotRegistered(id)
-        return cls
-
-    def exists(self, id):
-        return id in self.__values

+ 0 - 74
src/sentry/filters/web_crawlers.py

@@ -1,74 +0,0 @@
-from __future__ import absolute_import
-
-import re
-
-from .base import Filter
-from sentry.utils.data_filters import FilterStatKeys
-from sentry.utils.safe import get_path
-
-# not all of these agents are guaranteed to execute JavaScript, but to avoid
-# overhead of identifying which ones do, and which ones will over time we simply
-# target all of the major ones
-CRAWLERS = re.compile(
-    r'|'.join(
-        (
-            # Google spiders (Adsense and others)
-            # https://support.google.com/webmasters/answer/1061943?hl=en
-            r'Mediapartners\-Google',
-            r'AdsBot\-Google',
-            r'Googlebot',
-            r'FeedFetcher\-Google',
-            # Bing search
-            r'BingBot',
-            r'BingPreview',
-            # Baidu search
-            r'Baiduspider',
-            # Yahoo
-            r'Slurp',
-            # Sogou
-            r'Sogou',
-            # facebook
-            r'facebook',
-            # Alexa
-            r'ia_archiver',
-            # Generic bot
-            r'bots?[\/\s\)\;]',
-            # Generic spider
-            r'spider[\/\s\)\;]',
-            # Slack - see https://api.slack.com/robots
-            r'Slack',
-            # Google indexing bot
-            r'Calypso AppCrawler',
-            # Pingdom
-            r'pingdom',
-            # Lytics
-            r'lyticsbot'
-        )
-    ),
-    re.I
-)
-
-
-class WebCrawlersFilter(Filter):
-    id = FilterStatKeys.WEB_CRAWLER
-    name = 'Filter out known web crawlers'
-    description = 'Some crawlers may execute pages in incompatible ways which then cause errors that are unlikely to be seen by a normal user.'
-    default = True
-
-    def get_user_agent(self, data):
-        try:
-            for key, value in get_path(data, 'request', 'headers', filter=True) or ():
-                if key.lower() == 'user-agent':
-                    return value
-        except LookupError:
-            return ''
-
-    def test(self, data):
-        """Return True if event with given user agent should be filtered out, False otherwise"""
-
-        # TODO(dcramer): we could also look at UA parser and use the 'Spider'
-        # device type
-        user_agent = self.get_user_agent(data)
-        if not user_agent:
-            return False
-        return bool(CRAWLERS.search(user_agent))

Some files were not shown because too many files changed in this diff