punycode_ut.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #include "punycode.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/charset/wide.h>
  4. namespace {
  5. template<typename T1, typename T2>
  6. inline bool HasSameBuffer(const T1& s1, const T2& s2) {
  7. return s1.begin() == s2.begin();
  8. }
  9. }
  10. Y_UNIT_TEST_SUITE(TPunycodeTest) {
  11. static bool TestRaw(const TString& utf8, const TString& punycode) {
  12. TUtf16String unicode = UTF8ToWide(utf8);
  13. TString buf1;
  14. TUtf16String buf2;
  15. return HasSameBuffer(WideToPunycode(unicode, buf1), buf1) && buf1 == punycode && HasSameBuffer(PunycodeToWide(punycode, buf2), buf2) && buf2 == unicode && WideToPunycode(unicode) == punycode && PunycodeToWide(punycode) == unicode;
  16. }
  17. Y_UNIT_TEST(RawEncodeDecode) {
  18. UNIT_ASSERT(TestRaw("", ""));
  19. UNIT_ASSERT(TestRaw(" ", " -"));
  20. UNIT_ASSERT(TestRaw("-", "--"));
  21. UNIT_ASSERT(TestRaw("!@#$%", "!@#$%-"));
  22. UNIT_ASSERT(TestRaw("xn-", "xn--"));
  23. UNIT_ASSERT(TestRaw("xn--", "xn---"));
  24. UNIT_ASSERT(TestRaw("abc", "abc-"));
  25. UNIT_ASSERT(TestRaw("Latin123", "Latin123-"));
  26. UNIT_ASSERT(TestRaw("München", "Mnchen-3ya"));
  27. UNIT_ASSERT(TestRaw("bücher", "bcher-kva"));
  28. UNIT_ASSERT(TestRaw("BüüchEr", "BchEr-kvaa"));
  29. UNIT_ASSERT(TestRaw("президент", "d1abbgf6aiiy"));
  30. UNIT_ASSERT(TestRaw("Президент", "r0a6bcbig1bsy"));
  31. UNIT_ASSERT(TestRaw("ПРЕЗИДЕНТ", "g0abbgf6aiiy"));
  32. UNIT_ASSERT(TestRaw("рф", "p1ai"));
  33. UNIT_ASSERT(TestRaw("пример", "e1afmkfd"));
  34. {
  35. const wchar16 tmp[] = {0x82, 0x81, 0x80, 0};
  36. UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode
  37. }
  38. UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError);
  39. UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError);
  40. UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError);
  41. {
  42. TString longIn;
  43. for (size_t i = 0; i < 1024; ++i)
  44. longIn += "Qй";
  45. TString longOutlo11fbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
  46. UNIT_ASSERT(TestRaw(longIn, longOut));
  47. }
  48. }
  49. static bool TestHostName(const TString& utf8, const TString& punycode, bool canBePunycode = false) {
  50. TUtf16String unicode = UTF8ToWide(utf8);
  51. TString buf1;
  52. TUtf16String buf2;
  53. //Cerr << "Testing " << utf8 << Endl;
  54. return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result
  55. && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode;
  56. }
  57. static bool TestForced(const TString& bad) {
  58. return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad);
  59. }
  60. Y_UNIT_TEST(HostNameEncodeDecode) {
  61. UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true));
  62. UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true));
  63. UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true));
  64. UNIT_ASSERT(TestHostName("ascii.test", "ascii.test"));
  65. UNIT_ASSERT(TestHostName("", ""));
  66. UNIT_ASSERT(TestHostName(".", "."));
  67. UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok
  68. UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true));
  69. UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true));
  70. UNIT_ASSERT(TestHostName("-", "-"));
  71. UNIT_ASSERT(TestHostName("xn--", "xn--", true));
  72. UNIT_ASSERT(TestHostName("xn--aaa.-", "xn--aaa.-", true));
  73. UNIT_ASSERT(TestHostName("xn--xn--d1acpjx3f.xn--ru", "xn--xn--d1acpjx3f.xn--ru", true));
  74. {
  75. // non-ascii
  76. TString bad = "президент.рф";
  77. UNIT_ASSERT_EXCEPTION(PunycodeToHostName("президент.рф"), TPunycodeError);
  78. UNIT_ASSERT(ForcePunycodeToHostName(bad) == UTF8ToWide(bad));
  79. }
  80. {
  81. // too long domain label
  82. TString bad(500, 'a');
  83. UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError);
  84. UNIT_ASSERT(TestForced(bad)); // but can decode it
  85. }
  86. {
  87. // already has ACE prefix
  88. TString bad("xn--яндекс.xn--рф");
  89. UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError);
  90. UNIT_ASSERT(TestForced(bad));
  91. }
  92. {
  93. // empty non-root domain is not allowed (?)
  94. TString bad(".яндекс.рф");
  95. UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError);
  96. UNIT_ASSERT(TestForced(bad));
  97. }
  98. UNIT_ASSERT(CanBePunycodeHostName("xn--"));
  99. UNIT_ASSERT(CanBePunycodeHostName("yandex.xn--p1ai"));
  100. UNIT_ASSERT(CanBePunycodeHostName("xn--d1acpjx3f.xn--p1ai"));
  101. UNIT_ASSERT(CanBePunycodeHostName("a.b.c.d.xn--e"));
  102. UNIT_ASSERT(CanBePunycodeHostName("xn--a.b.c.xn--d.e"));
  103. UNIT_ASSERT(!CanBePunycodeHostName("yandex.ru")); // no xn--
  104. UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii
  105. UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii
  106. UNIT_ASSERT(!CanBePunycodeHostName(""));
  107. UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here
  108. }
  109. }