Browse Source

fix(grouping): Better handle unescaping (#21046)

Armin Ronacher 4 years ago
parent
commit
0ec910e4fd

+ 2 - 1
src/sentry/grouping/enhancer.py

@@ -20,6 +20,7 @@ from sentry.utils.compat import implements_to_string
 from sentry.utils.glob import glob_match
 from sentry.utils.safe import get_path
 from sentry.utils.compat import zip
+from sentry.utils.strings import unescape_string
 
 
 # Grammar is defined in EBNF syntax.
@@ -603,7 +604,7 @@ class EnhancmentsVisitor(NodeVisitor):
         return int(node.text)
 
     def visit_quoted(self, node, children):
-        return node.text[1:-1].encode("ascii", "backslashreplace").decode("unicode-escape")
+        return unescape_string(node.text[1:-1])
 
     def visit_unquoted(self, node, children):
         return node.text

+ 2 - 1
src/sentry/grouping/fingerprinting.py

@@ -10,6 +10,7 @@ from sentry.stacktraces.platform import get_behavior_family_for_platform
 from sentry.grouping.utils import get_rule_bool
 from sentry.utils.safe import get_path
 from sentry.utils.glob import glob_match
+from sentry.utils.strings import unescape_string
 
 
 VERSION = 1
@@ -420,7 +421,7 @@ class FingerprintingVisitor(NodeVisitor):
         return argument
 
     def visit_quoted(self, node, children):
-        return node.text[1:-1].encode("ascii", "backslashreplace").decode("unicode-escape")
+        return unescape_string(node.text[1:-1])
 
     def visit_unquoted(self, node, children):
         return node.text

+ 18 - 0
src/sentry/utils/strings.py

@@ -30,6 +30,24 @@ _lone_surrogate = re.compile(
 )
 
 
+def unicode_escape_recovery_handler(err):
+    try:
+        value = err.object[err.start : err.end].decode("utf-8")
+    except UnicodeError:
+        value = u""
+    return value, err.end
+
+
+codecs.register_error("unicode-escape-recovery", unicode_escape_recovery_handler)
+
+
+def unescape_string(value):
+    """Unescapes a backslash escaped string."""
+    return value.encode("ascii", "backslashreplace").decode(
+        "unicode-escape", "unicode-escape-recovery"
+    )
+
+
 def strip_lone_surrogates(string):
     """Removes lone surrogates."""
     if six.PY3:

+ 2 - 0
tests/sentry/grouping/test_fingerprinting.py

@@ -20,6 +20,7 @@ app:true                                        -> {{ default }}
 !path:**/foo/**                                 -> everything
 !"path":**/foo/**                               -> everything
 logger:sentry.*                                 -> logger-, {{ logger }}
+message:"\\x\\xff"                              -> stuff
 """
     )
     assert rules._to_config_structure() == {
@@ -34,6 +35,7 @@ logger:sentry.*                                 -> logger-, {{ logger }}
             {"matchers": [["!path", "**/foo/**"]], "fingerprint": ["everything"]},
             {"matchers": [["!path", "**/foo/**"]], "fingerprint": ["everything"]},
             {"matchers": [["logger", "sentry.*"]], "fingerprint": ["logger-", "{{ logger }}"]},
+            {"matchers": [["message", u"\\x\xff"]], "fingerprint": ["stuff"]},
         ],
         "version": 1,
     }