@@ -1,5 +1,7 @@
import functools
+import pytest
from sentry.utils.strings import (
@@ -15,42 +17,34 @@ ZWSP = "\u200b" # zero width space
SHY = "\u00ad" # soft hyphen
-def test_unescape_string():
- # For raw string literals, python escapes any backslash,
- # regardless if it's part of a recognized escape sequence or not.
- value = r"\x80"
- assert r"\x80" == "\\x80"
- # We want to unescape that.
- assert unescape_string(value) == "\x80"
- assert r"\x80" != "\x80"
- # For string literals, python leaves recognized escape sequences alone,
- # and we should as well.
- assert unescape_string("\x80") == "\x80"
- # Essentially, we want the resulting str to
- # have the same number of backslashes as the raw string.
- assert unescape_string(r"\\x80") == "\\x80"
- assert unescape_string(r"\\\x80") == "\\\x80"
- assert unescape_string(r"\\\\x80") == "\\\\x80"
- # Now for a real world example.
- # If we specify this value as a string literal, we'll get a DeprecationWarning
- # because \* is not a recognized escape sequence.
- # This raw string literal reflects what was read off disk from our grouping
- # enhancement config text files, before they were corrected to be \\**.
- value = r"C:/WINDOWS/system32/DriverStore\**"
- assert value == "C:/WINDOWS/system32/DriverStore\\**"
- # This string should remain unchanged after unescape_string,
- # because there are no recognized escape sequences to unescape.
- # From 3.6 to 3.8 a DeprecationWarning which we suppress will
- # be emitted during .decode("unicode-escape", "unicode-escape-recovery"),
- # because \* isn't a recognized escape sequence.
- # We just want this to be a reminder if the warning is upgraded to a
- # behavior change in 3.9+.
- assert unescape_string(value) == "C:/WINDOWS/system32/DriverStore\\**"
+ ("s", "expected"),
+ (
+ # the literal \x escape sequence is converted to the character
+ (r"\x80", "\x80"),
+ # the result should have the same number of backslashes as the raw string
+ (r"\\x80", "\\x80"),
+ (r"\\\x80", "\\\x80"),
+ (r"\\\\x80", "\\\\x80"),
+ # this string has an invalid escape sequence: \*
+ (r"C:/WINDOWS/system32/DriverStore\**", "C:/WINDOWS/system32/DriverStore\\**"),
+ # this string has an unterminated invalid escape sequence: \x
+ (r"\x", "\\x"),
+ (r"\\\x", "\\\\x"),
+ # decodes character escapes
+ (r"\t", "\t"),
+ (r"\0", "\0"),
+ (r"\11", "\11"),
+ (r"\111", "\111"),
+ (r"\u2603", "☃"),
+ (r"\U0001f643", "🙃"),
+ # probably a mistake in the configuration but it allows quoted strings
+ # with embedded newlines
+ ("hello\nworld", "hello\nworld"),
+ ),
+def test_unescape_string(s, expected):
+ assert unescape_string(s) == expected
def test_codec_lookup():