Browse Source

Move controls out of assigned unicode codepoint set to allow any unicode codepoint in PIRE's lexer.

Issue:
galtsev 2 years ago
parent
commit
22378bbd93
2 changed files with 11 additions and 2 deletions
  1. 2 2
      contrib/libs/pire/pire/re_lexer.h
  2. 9 0
      contrib/libs/pire/ut/read_unicode_ut.cpp

+ 2 - 2
contrib/libs/pire/pire/re_lexer.h

@@ -43,8 +43,8 @@ namespace Pire {
 namespace Consts {
 enum { Inf = -1 };
 
-static const wchar32 Control     = 0xF000;
-static const wchar32 ControlMask = 0xFF00;
+static const wchar32 Control     = 0xF0000000;
+static const wchar32 ControlMask = 0xFF000000;
 static const wchar32 End         = Control | 0xFF;
 };
 

+ 9 - 0
contrib/libs/pire/ut/read_unicode_ut.cpp

@@ -295,4 +295,13 @@ Y_UNIT_TEST_SUITE(ReadUnicodeTest) {
 		}
 	}
 
+	Y_UNIT_TEST(AnyUnicodeCodepointIsAllowed)
+	{
+		REGEXP("[\\x{0}-\\x{77}\\x{79}-\\x{10ffff}]") {
+			ACCEPTS("w");
+			DENIES ("x");
+			ACCEPTS("y");
+		}
+	}
+
 }