punycode.h 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. #pragma once
  2. #include <util/generic/string.h>
  3. #include <util/generic/strbuf.h>
  4. #include <util/generic/yexception.h>
  5. // Simplified arcadia wrappers for contrib/libs/libidn/
  6. // Raw strings encoder/decoder: does not prepend with ACE prefix ("xn--"),
  7. // does not limit input length. Throws TPunycodeError on any internal error.
  8. // Returned strbuf points to @out data.
  9. TStringBuf WideToPunycode(const TWtringBuf& in, TString& out);
  10. TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out);
  11. inline TString WideToPunycode(const TWtringBuf& in) {
  12. TString out;
  13. WideToPunycode(in, out);
  14. return out;
  15. }
  16. inline TUtf16String PunycodeToWide(const TStringBuf& in) {
  17. TUtf16String out;
  18. PunycodeToWide(in, out);
  19. return out;
  20. }
  21. // Encode a sequence of point-separated domain labels
  22. // into a sequence of corresponding punycode labels.
  23. // Labels containing non-ASCII characters are prefixed with ACE prefix ("xn--").
  24. // Limits maximal encoded domain label length to IDNA_LABEL_MAX_LENGTH (255 by default).
  25. // Throws TPunycodeError on failure.
  26. TString HostNameToPunycode(const TWtringBuf& unicodeHost);
  27. TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost);
  28. // Robust versions: on failure return original input, converted to/from UTF8
  29. TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost);
  30. TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost);
  31. // True if @host looks like punycode domain label sequence,
  32. // containing at least one ACE-prefixed label.
  33. // Note that this function does not check all requied IDNA constraints
  34. // (max label length, empty non-root domains, etc.)
  35. bool CanBePunycodeHostName(const TStringBuf& host);
  36. class TPunycodeError: public yexception {
  37. };