punycode.cpp 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. #include "punycode.h"
  2. #include <idna.h>
  3. #include <punycode.h>
  4. #include <util/charset/wide.h>
  5. #include <util/generic/ptr.h>
  6. #include <util/generic/vector.h>
  7. #include <cstdlib>
  8. static inline void CheckPunycodeResult(int rc) {
  9. if (rc != PUNYCODE_SUCCESS)
  10. ythrow TPunycodeError() << punycode_strerror(static_cast<Punycode_status>(rc));
  11. }
  12. static inline void CheckIdnaResult(int rc) {
  13. if (rc != IDNA_SUCCESS)
  14. ythrow TPunycodeError() << idna_strerror(static_cast<Idna_rc>(rc));
  15. }
  16. // UTF-32 helpers
  17. static inline void AppendWideToUtf32(const TWtringBuf& in, TVector<ui32>& out) {
  18. out.reserve(out.size() + in.size() + 1);
  19. const wchar16* b = in.begin();
  20. const wchar16* e = in.end();
  21. while (b < e) {
  22. out.push_back(ReadSymbolAndAdvance(b, e));
  23. }
  24. }
  25. static inline void AppendUtf32ToWide(const ui32* in, size_t len, TUtf16String& out) {
  26. out.reserve(out.size() + len);
  27. const ui32* b = in;
  28. const ui32* e = in + len;
  29. for (; b != e; ++b) {
  30. WriteSymbol(wchar32(*b), out);
  31. }
  32. }
  33. TStringBuf WideToPunycode(const TWtringBuf& in16, TString& out) {
  34. TVector<ui32> in32;
  35. AppendWideToUtf32(in16, in32);
  36. size_t outlen = in32.size();
  37. int rc;
  38. do {
  39. outlen *= 2;
  40. out.ReserveAndResize(outlen);
  41. rc = punycode_encode(in32.size(), in32.data(), nullptr, &outlen, out.begin());
  42. } while (rc == PUNYCODE_BIG_OUTPUT);
  43. CheckPunycodeResult(rc);
  44. out.resize(outlen);
  45. return out;
  46. }
  47. TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out16) {
  48. size_t outlen = in.size();
  49. TVector<ui32> out32(outlen);
  50. int rc = punycode_decode(in.size(), in.data(), &outlen, out32.begin(), nullptr);
  51. CheckPunycodeResult(rc);
  52. AppendUtf32ToWide(out32.begin(), outlen, out16);
  53. return out16;
  54. }
  55. namespace {
  56. template <typename TChar>
  57. struct TIdnaResult {
  58. TChar* Data = nullptr;
  59. ~TIdnaResult() {
  60. free(Data);
  61. }
  62. };
  63. }
  64. TString HostNameToPunycode(const TWtringBuf& unicodeHost) {
  65. TVector<ui32> in32;
  66. AppendWideToUtf32(unicodeHost, in32);
  67. in32.push_back(0);
  68. TIdnaResult<char> out;
  69. int rc = idna_to_ascii_4z(in32.begin(), &out.Data, 0);
  70. CheckIdnaResult(rc);
  71. return out.Data;
  72. }
  73. TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost) {
  74. if (!IsStringASCII(punycodeHost.begin(), punycodeHost.end()))
  75. ythrow TPunycodeError() << "Non-ASCII punycode input";
  76. size_t len = punycodeHost.size();
  77. TVector<ui32> in32(len + 1, 0);
  78. for (size_t i = 0; i < len; ++i)
  79. in32[i] = static_cast<ui8>(punycodeHost[i]);
  80. in32[len] = 0;
  81. TIdnaResult<ui32> out;
  82. int rc = idna_to_unicode_4z4z(in32.begin(), &out.Data, 0);
  83. CheckIdnaResult(rc);
  84. TUtf16String decoded;
  85. AppendUtf32ToWide(out.Data, std::char_traits<ui32>::length(out.Data), decoded);
  86. return decoded;
  87. }
  88. TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost) {
  89. try {
  90. return HostNameToPunycode(unicodeHost);
  91. } catch (const TPunycodeError&) {
  92. return WideToUTF8(unicodeHost);
  93. }
  94. }
  95. TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost) {
  96. try {
  97. return PunycodeToHostName(punycodeHost);
  98. } catch (const TPunycodeError&) {
  99. return UTF8ToWide(punycodeHost);
  100. }
  101. }
  102. bool CanBePunycodeHostName(const TStringBuf& host) {
  103. if (!IsStringASCII(host.begin(), host.end()))
  104. return false;
  105. static constexpr TStringBuf ACE = "xn--";
  106. TStringBuf tail(host);
  107. while (tail) {
  108. const TStringBuf label = tail.NextTok('.');
  109. if (label.StartsWith(ACE))
  110. return true;
  111. }
  112. return false;
  113. }