idna.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2010-2012, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: idna.h
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2010mar05
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __IDNA_H__
  17. #define __IDNA_H__
  18. /**
  19. * \file
  20. * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
  21. */
  22. #include "unicode/utypes.h"
  23. #if U_SHOW_CPLUSPLUS_API
  24. #if !UCONFIG_NO_IDNA
  25. #include "unicode/bytestream.h"
  26. #include "unicode/stringpiece.h"
  27. #include "unicode/uidna.h"
  28. #include "unicode/unistr.h"
  29. U_NAMESPACE_BEGIN
  30. class IDNAInfo;
  31. /**
  32. * Abstract base class for IDNA processing.
  33. * See http://www.unicode.org/reports/tr46/
  34. * and http://www.ietf.org/rfc/rfc3490.txt
  35. *
  36. * The IDNA class is not intended for public subclassing.
  37. *
  38. * This C++ API currently only implements UTS #46.
  39. * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
  40. * and IDNA2003 (functions that do not use a service object).
  41. * @stable ICU 4.6
  42. */
  43. class U_COMMON_API IDNA : public UObject {
  44. public:
  45. /**
  46. * Destructor.
  47. * @stable ICU 4.6
  48. */
  49. ~IDNA();
  50. /**
  51. * Returns an IDNA instance which implements UTS #46.
  52. * Returns an unmodifiable instance, owned by the caller.
  53. * Cache it for multiple operations, and delete it when done.
  54. * The instance is thread-safe, that is, it can be used concurrently.
  55. *
  56. * UTS #46 defines Unicode IDNA Compatibility Processing,
  57. * updated to the latest version of Unicode and compatible with both
  58. * IDNA2003 and IDNA2008.
  59. *
  60. * The worker functions use transitional processing, including deviation mappings,
  61. * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
  62. * is used in which case the deviation characters are passed through without change.
  63. *
  64. * Disallowed characters are mapped to U+FFFD.
  65. *
  66. * For available options see the uidna.h header.
  67. * Operations with the UTS #46 instance do not support the
  68. * UIDNA_ALLOW_UNASSIGNED option.
  69. *
  70. * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
  71. * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
  72. * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
  73. *
  74. * @param options Bit set to modify the processing and error checking.
  75. * See option bit set values in uidna.h.
  76. * @param errorCode Standard ICU error code. Its input value must
  77. * pass the U_SUCCESS() test, or else the function returns
  78. * immediately. Check for U_FAILURE() on output or use with
  79. * function chaining. (See User Guide for details.)
  80. * @return the UTS #46 IDNA instance, if successful
  81. * @stable ICU 4.6
  82. */
  83. static IDNA *
  84. createUTS46Instance(uint32_t options, UErrorCode &errorCode);
  85. /**
  86. * Converts a single domain name label into its ASCII form for DNS lookup.
  87. * If any processing step fails, then info.hasErrors() will be true and
  88. * the result might not be an ASCII string.
  89. * The label might be modified according to the types of errors.
  90. * Labels with severe errors will be left in (or turned into) their Unicode form.
  91. *
  92. * The UErrorCode indicates an error only in exceptional cases,
  93. * such as a U_MEMORY_ALLOCATION_ERROR.
  94. *
  95. * @param label Input domain name label
  96. * @param dest Destination string object
  97. * @param info Output container of IDNA processing details.
  98. * @param errorCode Standard ICU error code. Its input value must
  99. * pass the U_SUCCESS() test, or else the function returns
  100. * immediately. Check for U_FAILURE() on output or use with
  101. * function chaining. (See User Guide for details.)
  102. * @return dest
  103. * @stable ICU 4.6
  104. */
  105. virtual UnicodeString &
  106. labelToASCII(const UnicodeString &label, UnicodeString &dest,
  107. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  108. /**
  109. * Converts a single domain name label into its Unicode form for human-readable display.
  110. * If any processing step fails, then info.hasErrors() will be true.
  111. * The label might be modified according to the types of errors.
  112. *
  113. * The UErrorCode indicates an error only in exceptional cases,
  114. * such as a U_MEMORY_ALLOCATION_ERROR.
  115. *
  116. * @param label Input domain name label
  117. * @param dest Destination string object
  118. * @param info Output container of IDNA processing details.
  119. * @param errorCode Standard ICU error code. Its input value must
  120. * pass the U_SUCCESS() test, or else the function returns
  121. * immediately. Check for U_FAILURE() on output or use with
  122. * function chaining. (See User Guide for details.)
  123. * @return dest
  124. * @stable ICU 4.6
  125. */
  126. virtual UnicodeString &
  127. labelToUnicode(const UnicodeString &label, UnicodeString &dest,
  128. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  129. /**
  130. * Converts a whole domain name into its ASCII form for DNS lookup.
  131. * If any processing step fails, then info.hasErrors() will be true and
  132. * the result might not be an ASCII string.
  133. * The domain name might be modified according to the types of errors.
  134. * Labels with severe errors will be left in (or turned into) their Unicode form.
  135. *
  136. * The UErrorCode indicates an error only in exceptional cases,
  137. * such as a U_MEMORY_ALLOCATION_ERROR.
  138. *
  139. * @param name Input domain name
  140. * @param dest Destination string object
  141. * @param info Output container of IDNA processing details.
  142. * @param errorCode Standard ICU error code. Its input value must
  143. * pass the U_SUCCESS() test, or else the function returns
  144. * immediately. Check for U_FAILURE() on output or use with
  145. * function chaining. (See User Guide for details.)
  146. * @return dest
  147. * @stable ICU 4.6
  148. */
  149. virtual UnicodeString &
  150. nameToASCII(const UnicodeString &name, UnicodeString &dest,
  151. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  152. /**
  153. * Converts a whole domain name into its Unicode form for human-readable display.
  154. * If any processing step fails, then info.hasErrors() will be true.
  155. * The domain name might be modified according to the types of errors.
  156. *
  157. * The UErrorCode indicates an error only in exceptional cases,
  158. * such as a U_MEMORY_ALLOCATION_ERROR.
  159. *
  160. * @param name Input domain name
  161. * @param dest Destination string object
  162. * @param info Output container of IDNA processing details.
  163. * @param errorCode Standard ICU error code. Its input value must
  164. * pass the U_SUCCESS() test, or else the function returns
  165. * immediately. Check for U_FAILURE() on output or use with
  166. * function chaining. (See User Guide for details.)
  167. * @return dest
  168. * @stable ICU 4.6
  169. */
  170. virtual UnicodeString &
  171. nameToUnicode(const UnicodeString &name, UnicodeString &dest,
  172. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  173. // UTF-8 versions of the processing methods ---------------------------- ***
  174. /**
  175. * Converts a single domain name label into its ASCII form for DNS lookup.
  176. * UTF-8 version of labelToASCII(), same behavior.
  177. *
  178. * @param label Input domain name label
  179. * @param dest Destination byte sink; Flush()ed if successful
  180. * @param info Output container of IDNA processing details.
  181. * @param errorCode Standard ICU error code. Its input value must
  182. * pass the U_SUCCESS() test, or else the function returns
  183. * immediately. Check for U_FAILURE() on output or use with
  184. * function chaining. (See User Guide for details.)
  185. * @return dest
  186. * @stable ICU 4.6
  187. */
  188. virtual void
  189. labelToASCII_UTF8(StringPiece label, ByteSink &dest,
  190. IDNAInfo &info, UErrorCode &errorCode) const;
  191. /**
  192. * Converts a single domain name label into its Unicode form for human-readable display.
  193. * UTF-8 version of labelToUnicode(), same behavior.
  194. *
  195. * @param label Input domain name label
  196. * @param dest Destination byte sink; Flush()ed if successful
  197. * @param info Output container of IDNA processing details.
  198. * @param errorCode Standard ICU error code. Its input value must
  199. * pass the U_SUCCESS() test, or else the function returns
  200. * immediately. Check for U_FAILURE() on output or use with
  201. * function chaining. (See User Guide for details.)
  202. * @return dest
  203. * @stable ICU 4.6
  204. */
  205. virtual void
  206. labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
  207. IDNAInfo &info, UErrorCode &errorCode) const;
  208. /**
  209. * Converts a whole domain name into its ASCII form for DNS lookup.
  210. * UTF-8 version of nameToASCII(), same behavior.
  211. *
  212. * @param name Input domain name
  213. * @param dest Destination byte sink; Flush()ed if successful
  214. * @param info Output container of IDNA processing details.
  215. * @param errorCode Standard ICU error code. Its input value must
  216. * pass the U_SUCCESS() test, or else the function returns
  217. * immediately. Check for U_FAILURE() on output or use with
  218. * function chaining. (See User Guide for details.)
  219. * @return dest
  220. * @stable ICU 4.6
  221. */
  222. virtual void
  223. nameToASCII_UTF8(StringPiece name, ByteSink &dest,
  224. IDNAInfo &info, UErrorCode &errorCode) const;
  225. /**
  226. * Converts a whole domain name into its Unicode form for human-readable display.
  227. * UTF-8 version of nameToUnicode(), same behavior.
  228. *
  229. * @param name Input domain name
  230. * @param dest Destination byte sink; Flush()ed if successful
  231. * @param info Output container of IDNA processing details.
  232. * @param errorCode Standard ICU error code. Its input value must
  233. * pass the U_SUCCESS() test, or else the function returns
  234. * immediately. Check for U_FAILURE() on output or use with
  235. * function chaining. (See User Guide for details.)
  236. * @return dest
  237. * @stable ICU 4.6
  238. */
  239. virtual void
  240. nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
  241. IDNAInfo &info, UErrorCode &errorCode) const;
  242. };
  243. class UTS46;
  244. /**
  245. * Output container for IDNA processing errors.
  246. * The IDNAInfo class is not suitable for subclassing.
  247. * @stable ICU 4.6
  248. */
  249. class U_COMMON_API IDNAInfo : public UMemory {
  250. public:
  251. /**
  252. * Constructor for stack allocation.
  253. * @stable ICU 4.6
  254. */
  255. IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
  256. /**
  257. * Were there IDNA processing errors?
  258. * @return true if there were processing errors
  259. * @stable ICU 4.6
  260. */
  261. UBool hasErrors() const { return errors!=0; }
  262. /**
  263. * Returns a bit set indicating IDNA processing errors.
  264. * See UIDNA_ERROR_... constants in uidna.h.
  265. * @return bit set of processing errors
  266. * @stable ICU 4.6
  267. */
  268. uint32_t getErrors() const { return errors; }
  269. /**
  270. * Returns true if transitional and nontransitional processing produce different results.
  271. * This is the case when the input label or domain name contains
  272. * one or more deviation characters outside a Punycode label (see UTS #46).
  273. * <ul>
  274. * <li>With nontransitional processing, such characters are
  275. * copied to the destination string.
  276. * <li>With transitional processing, such characters are
  277. * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
  278. * </ul>
  279. * @return true if transitional and nontransitional processing produce different results
  280. * @stable ICU 4.6
  281. */
  282. UBool isTransitionalDifferent() const { return isTransDiff; }
  283. private:
  284. friend class UTS46;
  285. IDNAInfo(const IDNAInfo &other) = delete; // no copying
  286. IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying
  287. void reset() {
  288. errors=labelErrors=0;
  289. isTransDiff=false;
  290. isBiDi=false;
  291. isOkBiDi=true;
  292. }
  293. uint32_t errors, labelErrors;
  294. UBool isTransDiff;
  295. UBool isBiDi;
  296. UBool isOkBiDi;
  297. };
  298. U_NAMESPACE_END
  299. #endif // UCONFIG_NO_IDNA
  300. #endif /* U_SHOW_CPLUSPLUS_API */
  301. #endif // __IDNA_H__