escape_ut.cpp 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. #include "escape.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/generic/string.h>
  4. #include <util/charset/wide.h>
  5. using namespace std::string_view_literals;
  6. namespace {
  7. struct TExample {
  8. TString Expected;
  9. TString Source;
  10. TExample(const TStringBuf expected, const TStringBuf source)
  11. : Expected{expected}
  12. , Source{source}
  13. {
  14. }
  15. };
  16. }
  17. static const TExample CommonTestData[] = {
  18. // Should be valid UTF-8.
  19. {"http://ya.ru/", "http://ya.ru/"},
  20. {"http://ya.ru/\\x17\\n", "http://ya.ru/\x17\n"},
  21. {"http://ya.ru/\\0", "http://ya.ru/\0"sv},
  22. {"http://ya.ru/\\0\\0", "http://ya.ru/\0\0"sv},
  23. {"http://ya.ru/\\0\\0000", "http://ya.ru/\0\0"
  24. "0"sv},
  25. {"http://ya.ru/\\0\\0001", "http://ya.ru/\0\x00"
  26. "1"sv},
  27. {R"(\2\4\00678)", "\2\4\6"
  28. "78"sv}, // \6 -> \006 because next char '7' is "octal"
  29. {R"(\2\4\689)", "\2\4\6"
  30. "89"sv}, // \6 -> \6 because next char '8' is not "octal"
  31. {R"(\"Hello\", Alice said.)", "\"Hello\", Alice said."},
  32. {"Slash\\\\dash!", "Slash\\dash!"},
  33. {R"(There\nare\r\nnewlines.)", "There\nare\r\nnewlines."},
  34. {"There\\tare\\ttabs.", "There\tare\ttabs."},
  35. {"There are questions \\x3F\\x3F?", "There are questions ???"},
  36. {"There are questions \\x3F?", "There are questions ??"},
  37. };
  38. Y_UNIT_TEST_SUITE(TEscapeCTest) {
  39. Y_UNIT_TEST(TestStrokaEscapeC) {
  40. for (const auto& e : CommonTestData) {
  41. TString expected(e.Expected);
  42. TString source(e.Source);
  43. TString actual(EscapeC(e.Source));
  44. TString actual2(UnescapeC(e.Expected));
  45. UNIT_ASSERT_VALUES_EQUAL(e.Expected, actual);
  46. UNIT_ASSERT_VALUES_EQUAL(e.Source, actual2);
  47. }
  48. UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\x17\\n\\xAB", EscapeC(TString("http://ya.ru/\x17\n\xab")));
  49. UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\x17\n\xab", UnescapeC(TString("http://ya.ru/\\x17\\n\\xAB")));
  50. UNIT_ASSERT_VALUES_EQUAL("h", EscapeC('h'));
  51. UNIT_ASSERT_VALUES_EQUAL("h", UnescapeC(TString("h")));
  52. UNIT_ASSERT_VALUES_EQUAL("\\xFF", EscapeC('\xFF'));
  53. UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC(TString("\\xFF")));
  54. UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeC(TString("\xff"
  55. "f")));
  56. UNIT_ASSERT_VALUES_EQUAL("\xff"
  57. "f",
  58. UnescapeC(TString("\\377f")));
  59. UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeC(TString("\xff"
  60. "g")));
  61. UNIT_ASSERT_VALUES_EQUAL("\xff"
  62. "g",
  63. UnescapeC(TString("\\xFFg")));
  64. UNIT_ASSERT_VALUES_EQUAL("\xEA\x9A\x96", UnescapeC(TString("\\uA696")));
  65. UNIT_ASSERT_VALUES_EQUAL("Странный компроматтест", UnescapeC(TString("\\u0421\\u0442\\u0440\\u0430\\u043d\\u043d\\u044b\\u0439 \\u043a\\u043e\\u043c\\u043f\\u0440\\u043e\\u043c\\u0430\\u0442тест")));
  66. }
  67. Y_UNIT_TEST(TestWtrokaEscapeC) {
  68. for (const auto& e : CommonTestData) {
  69. TUtf16String expected(UTF8ToWide(e.Expected));
  70. TUtf16String source(UTF8ToWide(e.Source));
  71. TUtf16String actual(EscapeC(source));
  72. TUtf16String actual2(UnescapeC(expected));
  73. UNIT_ASSERT_VALUES_EQUAL(expected, actual);
  74. UNIT_ASSERT_VALUES_EQUAL(source, actual2);
  75. }
  76. UNIT_ASSERT_VALUES_EQUAL(u"http://ya.ru/\\x17\\n\\u1234", EscapeC(u"http://ya.ru/\x17\n\u1234"));
  77. UNIT_ASSERT_VALUES_EQUAL(u"h", EscapeC(u'h'));
  78. UNIT_ASSERT_VALUES_EQUAL(u"\\xFF", EscapeC(wchar16(255)));
  79. }
  80. Y_UNIT_TEST(TestEscapeTrigraphs) {
  81. UNIT_ASSERT_VALUES_EQUAL("?", EscapeC(TString("?")));
  82. UNIT_ASSERT_VALUES_EQUAL("\\x3F?", EscapeC(TString("??")));
  83. UNIT_ASSERT_VALUES_EQUAL("\\x3F\\x3F?", EscapeC(TString("???")));
  84. // ok but may cause warning about trigraphs
  85. // UNIT_ASSERT_VALUES_EQUAL("[x]?z", EscapeC(TString("??(x??)?z")));
  86. UNIT_ASSERT_VALUES_EQUAL("\\x3F?x\\x3F\\x3F?z", EscapeC(TString("??x???z")));
  87. }
  88. Y_UNIT_TEST(TestUnescapeCCharLen) {
  89. auto test = [](const char* str, size_t len) {
  90. UNIT_ASSERT_EQUAL(UnescapeCCharLen(str, str + strlen(str)), len);
  91. };
  92. test("", 0);
  93. test("abc", 1);
  94. test("\\", 1);
  95. test("\\\\", 2);
  96. test("\\#", 2);
  97. test("\\n10", 2);
  98. test("\\r\\n", 2);
  99. test("\\x05abc", 4);
  100. test("\\u11117777", 6);
  101. test("\\u123yyy", 2);
  102. test("\\U11117777cccc", 10);
  103. test("\\U111yyy", 2);
  104. test("\\0\\1", 2);
  105. test("\\01\\1", 3);
  106. test("\\012\\1", 4);
  107. test("\\0123\\1", 4);
  108. test("\\4\\1", 2);
  109. test("\\40\\1", 3);
  110. test("\\400\\1", 3);
  111. test("\\4xxx", 2);
  112. }
  113. Y_UNIT_TEST(TestUnbounded) {
  114. char buf[100000];
  115. for (const auto& x : CommonTestData) {
  116. char* end = UnescapeC(x.Expected.data(), x.Expected.size(), buf);
  117. UNIT_ASSERT_VALUES_EQUAL(x.Source, TStringBuf(buf, end));
  118. }
  119. }
  120. Y_UNIT_TEST(TestCapitalUEscapes) {
  121. UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\U00000020"), " ");
  122. UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\Uxxx"), "Uxxx");
  123. }
  124. }