uidna.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uidna.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003feb1
  16. * created by: Ram Viswanadha
  17. */
  18. #ifndef __UIDNA_H__
  19. #define __UIDNA_H__
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_IDNA
  22. #include <stdbool.h>
  23. #include "unicode/parseerr.h"
  24. #if U_SHOW_CPLUSPLUS_API
  25. #include "unicode/localpointer.h"
  26. #endif // U_SHOW_CPLUSPLUS_API
  27. /**
  28. * \file
  29. * \brief C API: Internationalizing Domain Names in Applications (IDNA)
  30. *
  31. * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
  32. *
  33. * The C API functions which do take a UIDNA * service object pointer
  34. * implement UTS #46 and IDNA2008.
  35. *
  36. * IDNA2003 is obsolete.
  37. * The C API functions which do not take a service object pointer
  38. * implement IDNA2003. They are all deprecated.
  39. */
  40. /*
  41. * IDNA option bit set values.
  42. */
  43. enum {
  44. /**
  45. * Default options value: None of the other options are set.
  46. * For use in static worker and factory methods.
  47. * @stable ICU 2.6
  48. */
  49. UIDNA_DEFAULT=0,
  50. #ifndef U_HIDE_DEPRECATED_API
  51. /**
  52. * Option to allow unassigned code points in domain names and labels.
  53. * For use in static worker and factory methods.
  54. * <p>This option is ignored by the UTS46 implementation.
  55. * (UTS #46 disallows unassigned code points.)
  56. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  57. */
  58. UIDNA_ALLOW_UNASSIGNED=1,
  59. #endif /* U_HIDE_DEPRECATED_API */
  60. /**
  61. * Option to check whether the input conforms to the STD3 ASCII rules,
  62. * for example the restriction of labels to LDH characters
  63. * (ASCII Letters, Digits and Hyphen-Minus).
  64. * For use in static worker and factory methods.
  65. * @stable ICU 2.6
  66. */
  67. UIDNA_USE_STD3_RULES=2,
  68. /**
  69. * IDNA option to check for whether the input conforms to the BiDi rules.
  70. * For use in static worker and factory methods.
  71. * <p>This option is ignored by the IDNA2003 implementation.
  72. * (IDNA2003 always performs a BiDi check.)
  73. * @stable ICU 4.6
  74. */
  75. UIDNA_CHECK_BIDI=4,
  76. /**
  77. * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
  78. * For use in static worker and factory methods.
  79. * <p>This option is ignored by the IDNA2003 implementation.
  80. * (The CONTEXTJ check is new in IDNA2008.)
  81. * @stable ICU 4.6
  82. */
  83. UIDNA_CHECK_CONTEXTJ=8,
  84. /**
  85. * IDNA option for nontransitional processing in ToASCII().
  86. * For use in static worker and factory methods.
  87. * <p>By default, ToASCII() uses transitional processing.
  88. * <p>This option is ignored by the IDNA2003 implementation.
  89. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  90. * @stable ICU 4.6
  91. */
  92. UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
  93. /**
  94. * IDNA option for nontransitional processing in ToUnicode().
  95. * For use in static worker and factory methods.
  96. * <p>By default, ToUnicode() uses transitional processing.
  97. * <p>This option is ignored by the IDNA2003 implementation.
  98. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  99. * @stable ICU 4.6
  100. */
  101. UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
  102. /**
  103. * IDNA option to check for whether the input conforms to the CONTEXTO rules.
  104. * For use in static worker and factory methods.
  105. * <p>This option is ignored by the IDNA2003 implementation.
  106. * (The CONTEXTO check is new in IDNA2008.)
  107. * <p>This is for use by registries for IDNA2008 conformance.
  108. * UTS #46 does not require the CONTEXTO check.
  109. * @stable ICU 49
  110. */
  111. UIDNA_CHECK_CONTEXTO=0x40
  112. };
  113. /**
  114. * Opaque C service object type for the new IDNA API.
  115. * @stable ICU 4.6
  116. */
  117. struct UIDNA;
  118. typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
  119. /**
  120. * Returns a UIDNA instance which implements UTS #46.
  121. * Returns an unmodifiable instance, owned by the caller.
  122. * Cache it for multiple operations, and uidna_close() it when done.
  123. * The instance is thread-safe, that is, it can be used concurrently.
  124. *
  125. * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
  126. *
  127. * @param options Bit set to modify the processing and error checking.
  128. * See option bit set values in uidna.h.
  129. * @param pErrorCode Standard ICU error code. Its input value must
  130. * pass the U_SUCCESS() test, or else the function returns
  131. * immediately. Check for U_FAILURE() on output or use with
  132. * function chaining. (See User Guide for details.)
  133. * @return the UTS #46 UIDNA instance, if successful
  134. * @stable ICU 4.6
  135. */
  136. U_CAPI UIDNA * U_EXPORT2
  137. uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
  138. /**
  139. * Closes a UIDNA instance.
  140. * @param idna UIDNA instance to be closed
  141. * @stable ICU 4.6
  142. */
  143. U_CAPI void U_EXPORT2
  144. uidna_close(UIDNA *idna);
  145. #if U_SHOW_CPLUSPLUS_API
  146. U_NAMESPACE_BEGIN
  147. /**
  148. * \class LocalUIDNAPointer
  149. * "Smart pointer" class, closes a UIDNA via uidna_close().
  150. * For most methods see the LocalPointerBase base class.
  151. *
  152. * @see LocalPointerBase
  153. * @see LocalPointer
  154. * @stable ICU 4.6
  155. */
  156. U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
  157. U_NAMESPACE_END
  158. #endif
  159. /**
  160. * Output container for IDNA processing errors.
  161. * Initialize with UIDNA_INFO_INITIALIZER:
  162. * \code
  163. * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  164. * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
  165. * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
  166. * \endcode
  167. * @stable ICU 4.6
  168. */
  169. typedef struct UIDNAInfo {
  170. /** sizeof(UIDNAInfo) @stable ICU 4.6 */
  171. int16_t size;
  172. /**
  173. * Set to true if transitional and nontransitional processing produce different results.
  174. * For details see C++ IDNAInfo::isTransitionalDifferent().
  175. * @stable ICU 4.6
  176. */
  177. UBool isTransitionalDifferent;
  178. UBool reservedB3; /**< Reserved field, do not use. @internal */
  179. /**
  180. * Bit set indicating IDNA processing errors. 0 if no errors.
  181. * See UIDNA_ERROR_... constants.
  182. * @stable ICU 4.6
  183. */
  184. uint32_t errors;
  185. int32_t reservedI2; /**< Reserved field, do not use. @internal */
  186. int32_t reservedI3; /**< Reserved field, do not use. @internal */
  187. } UIDNAInfo;
  188. /**
  189. * Static initializer for a UIDNAInfo struct.
  190. * @stable ICU 4.6
  191. */
  192. #define UIDNA_INFO_INITIALIZER { \
  193. (int16_t)sizeof(UIDNAInfo), \
  194. false, false, \
  195. 0, 0, 0 }
  196. /**
  197. * Converts a single domain name label into its ASCII form for DNS lookup.
  198. * If any processing step fails, then pInfo->errors will be non-zero and
  199. * the result might not be an ASCII string.
  200. * The label might be modified according to the types of errors.
  201. * Labels with severe errors will be left in (or turned into) their Unicode form.
  202. *
  203. * The UErrorCode indicates an error only in exceptional cases,
  204. * such as a U_MEMORY_ALLOCATION_ERROR.
  205. *
  206. * @param idna UIDNA instance
  207. * @param label Input domain name label
  208. * @param length Label length, or -1 if NUL-terminated
  209. * @param dest Destination string buffer
  210. * @param capacity Destination buffer capacity
  211. * @param pInfo Output container of IDNA processing details.
  212. * @param pErrorCode Standard ICU error code. Its input value must
  213. * pass the U_SUCCESS() test, or else the function returns
  214. * immediately. Check for U_FAILURE() on output or use with
  215. * function chaining. (See User Guide for details.)
  216. * @return destination string length
  217. * @stable ICU 4.6
  218. */
  219. U_CAPI int32_t U_EXPORT2
  220. uidna_labelToASCII(const UIDNA *idna,
  221. const UChar *label, int32_t length,
  222. UChar *dest, int32_t capacity,
  223. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  224. /**
  225. * Converts a single domain name label into its Unicode form for human-readable display.
  226. * If any processing step fails, then pInfo->errors will be non-zero.
  227. * The label might be modified according to the types of errors.
  228. *
  229. * The UErrorCode indicates an error only in exceptional cases,
  230. * such as a U_MEMORY_ALLOCATION_ERROR.
  231. *
  232. * @param idna UIDNA instance
  233. * @param label Input domain name label
  234. * @param length Label length, or -1 if NUL-terminated
  235. * @param dest Destination string buffer
  236. * @param capacity Destination buffer capacity
  237. * @param pInfo Output container of IDNA processing details.
  238. * @param pErrorCode Standard ICU error code. Its input value must
  239. * pass the U_SUCCESS() test, or else the function returns
  240. * immediately. Check for U_FAILURE() on output or use with
  241. * function chaining. (See User Guide for details.)
  242. * @return destination string length
  243. * @stable ICU 4.6
  244. */
  245. U_CAPI int32_t U_EXPORT2
  246. uidna_labelToUnicode(const UIDNA *idna,
  247. const UChar *label, int32_t length,
  248. UChar *dest, int32_t capacity,
  249. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  250. /**
  251. * Converts a whole domain name into its ASCII form for DNS lookup.
  252. * If any processing step fails, then pInfo->errors will be non-zero and
  253. * the result might not be an ASCII string.
  254. * The domain name might be modified according to the types of errors.
  255. * Labels with severe errors will be left in (or turned into) their Unicode form.
  256. *
  257. * The UErrorCode indicates an error only in exceptional cases,
  258. * such as a U_MEMORY_ALLOCATION_ERROR.
  259. *
  260. * @param idna UIDNA instance
  261. * @param name Input domain name
  262. * @param length Domain name length, or -1 if NUL-terminated
  263. * @param dest Destination string buffer
  264. * @param capacity Destination buffer capacity
  265. * @param pInfo Output container of IDNA processing details.
  266. * @param pErrorCode Standard ICU error code. Its input value must
  267. * pass the U_SUCCESS() test, or else the function returns
  268. * immediately. Check for U_FAILURE() on output or use with
  269. * function chaining. (See User Guide for details.)
  270. * @return destination string length
  271. * @stable ICU 4.6
  272. */
  273. U_CAPI int32_t U_EXPORT2
  274. uidna_nameToASCII(const UIDNA *idna,
  275. const UChar *name, int32_t length,
  276. UChar *dest, int32_t capacity,
  277. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  278. /**
  279. * Converts a whole domain name into its Unicode form for human-readable display.
  280. * If any processing step fails, then pInfo->errors will be non-zero.
  281. * The domain name might be modified according to the types of errors.
  282. *
  283. * The UErrorCode indicates an error only in exceptional cases,
  284. * such as a U_MEMORY_ALLOCATION_ERROR.
  285. *
  286. * @param idna UIDNA instance
  287. * @param name Input domain name
  288. * @param length Domain name length, or -1 if NUL-terminated
  289. * @param dest Destination string buffer
  290. * @param capacity Destination buffer capacity
  291. * @param pInfo Output container of IDNA processing details.
  292. * @param pErrorCode Standard ICU error code. Its input value must
  293. * pass the U_SUCCESS() test, or else the function returns
  294. * immediately. Check for U_FAILURE() on output or use with
  295. * function chaining. (See User Guide for details.)
  296. * @return destination string length
  297. * @stable ICU 4.6
  298. */
  299. U_CAPI int32_t U_EXPORT2
  300. uidna_nameToUnicode(const UIDNA *idna,
  301. const UChar *name, int32_t length,
  302. UChar *dest, int32_t capacity,
  303. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  304. /* UTF-8 versions of the processing methods --------------------------------- */
  305. /**
  306. * Converts a single domain name label into its ASCII form for DNS lookup.
  307. * UTF-8 version of uidna_labelToASCII(), same behavior.
  308. *
  309. * @param idna UIDNA instance
  310. * @param label Input domain name label
  311. * @param length Label length, or -1 if NUL-terminated
  312. * @param dest Destination string buffer
  313. * @param capacity Destination buffer capacity
  314. * @param pInfo Output container of IDNA processing details.
  315. * @param pErrorCode Standard ICU error code. Its input value must
  316. * pass the U_SUCCESS() test, or else the function returns
  317. * immediately. Check for U_FAILURE() on output or use with
  318. * function chaining. (See User Guide for details.)
  319. * @return destination string length
  320. * @stable ICU 4.6
  321. */
  322. U_CAPI int32_t U_EXPORT2
  323. uidna_labelToASCII_UTF8(const UIDNA *idna,
  324. const char *label, int32_t length,
  325. char *dest, int32_t capacity,
  326. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  327. /**
  328. * Converts a single domain name label into its Unicode form for human-readable display.
  329. * UTF-8 version of uidna_labelToUnicode(), same behavior.
  330. *
  331. * @param idna UIDNA instance
  332. * @param label Input domain name label
  333. * @param length Label length, or -1 if NUL-terminated
  334. * @param dest Destination string buffer
  335. * @param capacity Destination buffer capacity
  336. * @param pInfo Output container of IDNA processing details.
  337. * @param pErrorCode Standard ICU error code. Its input value must
  338. * pass the U_SUCCESS() test, or else the function returns
  339. * immediately. Check for U_FAILURE() on output or use with
  340. * function chaining. (See User Guide for details.)
  341. * @return destination string length
  342. * @stable ICU 4.6
  343. */
  344. U_CAPI int32_t U_EXPORT2
  345. uidna_labelToUnicodeUTF8(const UIDNA *idna,
  346. const char *label, int32_t length,
  347. char *dest, int32_t capacity,
  348. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  349. /**
  350. * Converts a whole domain name into its ASCII form for DNS lookup.
  351. * UTF-8 version of uidna_nameToASCII(), same behavior.
  352. *
  353. * @param idna UIDNA instance
  354. * @param name Input domain name
  355. * @param length Domain name length, or -1 if NUL-terminated
  356. * @param dest Destination string buffer
  357. * @param capacity Destination buffer capacity
  358. * @param pInfo Output container of IDNA processing details.
  359. * @param pErrorCode Standard ICU error code. Its input value must
  360. * pass the U_SUCCESS() test, or else the function returns
  361. * immediately. Check for U_FAILURE() on output or use with
  362. * function chaining. (See User Guide for details.)
  363. * @return destination string length
  364. * @stable ICU 4.6
  365. */
  366. U_CAPI int32_t U_EXPORT2
  367. uidna_nameToASCII_UTF8(const UIDNA *idna,
  368. const char *name, int32_t length,
  369. char *dest, int32_t capacity,
  370. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  371. /**
  372. * Converts a whole domain name into its Unicode form for human-readable display.
  373. * UTF-8 version of uidna_nameToUnicode(), same behavior.
  374. *
  375. * @param idna UIDNA instance
  376. * @param name Input domain name
  377. * @param length Domain name length, or -1 if NUL-terminated
  378. * @param dest Destination string buffer
  379. * @param capacity Destination buffer capacity
  380. * @param pInfo Output container of IDNA processing details.
  381. * @param pErrorCode Standard ICU error code. Its input value must
  382. * pass the U_SUCCESS() test, or else the function returns
  383. * immediately. Check for U_FAILURE() on output or use with
  384. * function chaining. (See User Guide for details.)
  385. * @return destination string length
  386. * @stable ICU 4.6
  387. */
  388. U_CAPI int32_t U_EXPORT2
  389. uidna_nameToUnicodeUTF8(const UIDNA *idna,
  390. const char *name, int32_t length,
  391. char *dest, int32_t capacity,
  392. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  393. /*
  394. * IDNA error bit set values.
  395. * When a domain name or label fails a processing step or does not meet the
  396. * validity criteria, then one or more of these error bits are set.
  397. */
  398. enum {
  399. /**
  400. * A non-final domain name label (or the whole domain name) is empty.
  401. * @stable ICU 4.6
  402. */
  403. UIDNA_ERROR_EMPTY_LABEL=1,
  404. /**
  405. * A domain name label is longer than 63 bytes.
  406. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  407. * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
  408. * @stable ICU 4.6
  409. */
  410. UIDNA_ERROR_LABEL_TOO_LONG=2,
  411. /**
  412. * A domain name is longer than 255 bytes in its storage form.
  413. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  414. * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
  415. * @stable ICU 4.6
  416. */
  417. UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
  418. /**
  419. * A label starts with a hyphen-minus ('-').
  420. * @stable ICU 4.6
  421. */
  422. UIDNA_ERROR_LEADING_HYPHEN=8,
  423. /**
  424. * A label ends with a hyphen-minus ('-').
  425. * @stable ICU 4.6
  426. */
  427. UIDNA_ERROR_TRAILING_HYPHEN=0x10,
  428. /**
  429. * A label contains hyphen-minus ('-') in the third and fourth positions.
  430. * @stable ICU 4.6
  431. */
  432. UIDNA_ERROR_HYPHEN_3_4=0x20,
  433. /**
  434. * A label starts with a combining mark.
  435. * @stable ICU 4.6
  436. */
  437. UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
  438. /**
  439. * A label or domain name contains disallowed characters.
  440. * @stable ICU 4.6
  441. */
  442. UIDNA_ERROR_DISALLOWED=0x80,
  443. /**
  444. * A label starts with "xn--" but does not contain valid Punycode.
  445. * That is, an xn-- label failed Punycode decoding.
  446. * @stable ICU 4.6
  447. */
  448. UIDNA_ERROR_PUNYCODE=0x100,
  449. /**
  450. * A label contains a dot=full stop.
  451. * This can occur in an input string for a single-label function.
  452. * @stable ICU 4.6
  453. */
  454. UIDNA_ERROR_LABEL_HAS_DOT=0x200,
  455. /**
  456. * An ACE label does not contain a valid label string.
  457. * The label was successfully ACE (Punycode) decoded but the resulting
  458. * string had severe validation errors. For example,
  459. * it might contain characters that are not allowed in ACE labels,
  460. * or it might not be normalized.
  461. * @stable ICU 4.6
  462. */
  463. UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
  464. /**
  465. * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
  466. * @stable ICU 4.6
  467. */
  468. UIDNA_ERROR_BIDI=0x800,
  469. /**
  470. * A label does not meet the IDNA CONTEXTJ requirements.
  471. * @stable ICU 4.6
  472. */
  473. UIDNA_ERROR_CONTEXTJ=0x1000,
  474. /**
  475. * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
  476. * Some punctuation characters "Would otherwise have been DISALLOWED"
  477. * but are allowed in certain contexts. (RFC 5892)
  478. * @stable ICU 49
  479. */
  480. UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
  481. /**
  482. * A label does not meet the IDNA CONTEXTO requirements for digits.
  483. * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
  484. * @stable ICU 49
  485. */
  486. UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
  487. };
  488. #ifndef U_HIDE_DEPRECATED_API
  489. /* IDNA2003 API ------------------------------------------------------------- */
  490. /**
  491. * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
  492. * This operation is done on <b>single labels</b> before sending it to something that expects
  493. * ASCII names. A label is an individual part of a domain name. Labels are usually
  494. * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  495. *
  496. * IDNA2003 API Overview:
  497. *
  498. * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
  499. * (http://www.ietf.org/rfc/rfc3490.txt).
  500. * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
  501. * containing non-ASCII code points are processed by the
  502. * ToASCII operation before passing it to resolver libraries. Domain names
  503. * that are obtained from resolver libraries are processed by the
  504. * ToUnicode operation before displaying the domain name to the user.
  505. * IDNA requires that implementations process input strings with Nameprep
  506. * (http://www.ietf.org/rfc/rfc3491.txt),
  507. * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
  508. * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
  509. * Implementations of IDNA MUST fully implement Nameprep and Punycode;
  510. * neither Nameprep nor Punycode are optional.
  511. * The input and output of ToASCII and ToUnicode operations are Unicode
  512. * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
  513. * multiple times to an input string will yield the same result as applying the operation
  514. * once.
  515. * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
  516. * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
  517. *
  518. * @param src Input UChar array containing label in Unicode.
  519. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  520. * @param dest Output UChar array with ASCII (ACE encoded) label.
  521. * @param destCapacity Size of dest.
  522. * @param options A bit set of options:
  523. *
  524. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  525. * and do not use STD3 ASCII rules
  526. * If unassigned code points are found the operation fails with
  527. * U_UNASSIGNED_ERROR error code.
  528. *
  529. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  530. * If this option is set, the unassigned code points are in the input
  531. * are treated as normal Unicode code points.
  532. *
  533. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  534. * If this option is set and the input does not satisfy STD3 rules,
  535. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  536. *
  537. * @param parseError Pointer to UParseError struct to receive information on position
  538. * of error if an error is encountered. Can be NULL.
  539. * @param status ICU in/out error code parameter.
  540. * U_INVALID_CHAR_FOUND if src contains
  541. * unmatched single surrogates.
  542. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  543. * too many code points.
  544. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  545. * @return The length of the result string, if successful - or in case of a buffer overflow,
  546. * in which case it will be greater than destCapacity.
  547. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  548. */
  549. U_DEPRECATED int32_t U_EXPORT2
  550. uidna_toASCII(const UChar* src, int32_t srcLength,
  551. UChar* dest, int32_t destCapacity,
  552. int32_t options,
  553. UParseError* parseError,
  554. UErrorCode* status);
  555. /**
  556. * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
  557. * This operation is done on <b>single labels</b> before sending it to something that expects
  558. * Unicode names. A label is an individual part of a domain name. Labels are usually
  559. * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  560. *
  561. * @param src Input UChar array containing ASCII (ACE encoded) label.
  562. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  563. * @param dest Output Converted UChar array containing Unicode equivalent of label.
  564. * @param destCapacity Size of dest.
  565. * @param options A bit set of options:
  566. *
  567. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  568. * and do not use STD3 ASCII rules
  569. * If unassigned code points are found the operation fails with
  570. * U_UNASSIGNED_ERROR error code.
  571. *
  572. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  573. * If this option is set, the unassigned code points are in the input
  574. * are treated as normal Unicode code points. <b> Note: </b> This option is
  575. * required on toUnicode operation because the RFC mandates
  576. * verification of decoded ACE input by applying toASCII and comparing
  577. * its output with source
  578. *
  579. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  580. * If this option is set and the input does not satisfy STD3 rules,
  581. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  582. *
  583. * @param parseError Pointer to UParseError struct to receive information on position
  584. * of error if an error is encountered. Can be NULL.
  585. * @param status ICU in/out error code parameter.
  586. * U_INVALID_CHAR_FOUND if src contains
  587. * unmatched single surrogates.
  588. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  589. * too many code points.
  590. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  591. * @return The length of the result string, if successful - or in case of a buffer overflow,
  592. * in which case it will be greater than destCapacity.
  593. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  594. */
  595. U_DEPRECATED int32_t U_EXPORT2
  596. uidna_toUnicode(const UChar* src, int32_t srcLength,
  597. UChar* dest, int32_t destCapacity,
  598. int32_t options,
  599. UParseError* parseError,
  600. UErrorCode* status);
  601. /**
  602. * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
  603. * This operation is done on complete domain names, e.g: "www.example.com".
  604. * It is important to note that this operation can fail. If it fails, then the input
  605. * domain name cannot be used as an Internationalized Domain Name and the application
  606. * should have methods defined to deal with the failure.
  607. *
  608. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  609. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  610. * and then convert. This function does not offer that level of granularity. The options once
  611. * set will apply to all labels in the domain name
  612. *
  613. * @param src Input UChar array containing IDN in Unicode.
  614. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  615. * @param dest Output UChar array with ASCII (ACE encoded) IDN.
  616. * @param destCapacity Size of dest.
  617. * @param options A bit set of options:
  618. *
  619. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  620. * and do not use STD3 ASCII rules
  621. * If unassigned code points are found the operation fails with
  622. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  623. *
  624. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  625. * If this option is set, the unassigned code points are in the input
  626. * are treated as normal Unicode code points.
  627. *
  628. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  629. * If this option is set and the input does not satisfy STD3 rules,
  630. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  631. *
  632. * @param parseError Pointer to UParseError struct to receive information on position
  633. * of error if an error is encountered. Can be NULL.
  634. * @param status ICU in/out error code parameter.
  635. * U_INVALID_CHAR_FOUND if src contains
  636. * unmatched single surrogates.
  637. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  638. * too many code points.
  639. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  640. * @return The length of the result string, if successful - or in case of a buffer overflow,
  641. * in which case it will be greater than destCapacity.
  642. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  643. */
  644. U_DEPRECATED int32_t U_EXPORT2
  645. uidna_IDNToASCII( const UChar* src, int32_t srcLength,
  646. UChar* dest, int32_t destCapacity,
  647. int32_t options,
  648. UParseError* parseError,
  649. UErrorCode* status);
  650. /**
  651. * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
  652. * This operation is done on complete domain names, e.g: "www.example.com".
  653. *
  654. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  655. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  656. * and then convert. This function does not offer that level of granularity. The options once
  657. * set will apply to all labels in the domain name
  658. *
  659. * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
  660. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  661. * @param dest Output UChar array containing Unicode equivalent of source IDN.
  662. * @param destCapacity Size of dest.
  663. * @param options A bit set of options:
  664. *
  665. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  666. * and do not use STD3 ASCII rules
  667. * If unassigned code points are found the operation fails with
  668. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  669. *
  670. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  671. * If this option is set, the unassigned code points are in the input
  672. * are treated as normal Unicode code points.
  673. *
  674. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  675. * If this option is set and the input does not satisfy STD3 rules,
  676. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  677. *
  678. * @param parseError Pointer to UParseError struct to receive information on position
  679. * of error if an error is encountered. Can be NULL.
  680. * @param status ICU in/out error code parameter.
  681. * U_INVALID_CHAR_FOUND if src contains
  682. * unmatched single surrogates.
  683. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  684. * too many code points.
  685. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  686. * @return The length of the result string, if successful - or in case of a buffer overflow,
  687. * in which case it will be greater than destCapacity.
  688. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  689. */
  690. U_DEPRECATED int32_t U_EXPORT2
  691. uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
  692. UChar* dest, int32_t destCapacity,
  693. int32_t options,
  694. UParseError* parseError,
  695. UErrorCode* status);
  696. /**
  697. * IDNA2003: Compare two IDN strings for equivalence.
  698. * This function splits the domain names into labels and compares them.
  699. * According to IDN RFC, whenever two labels are compared, they are
  700. * considered equal if and only if their ASCII forms (obtained by
  701. * applying toASCII) match using an case-insensitive ASCII comparison.
  702. * Two domain names are considered a match if and only if all labels
  703. * match regardless of whether label separators match.
  704. *
  705. * @param s1 First source string.
  706. * @param length1 Length of first source string, or -1 if NUL-terminated.
  707. *
  708. * @param s2 Second source string.
  709. * @param length2 Length of second source string, or -1 if NUL-terminated.
  710. * @param options A bit set of options:
  711. *
  712. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  713. * and do not use STD3 ASCII rules
  714. * If unassigned code points are found the operation fails with
  715. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  716. *
  717. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  718. * If this option is set, the unassigned code points are in the input
  719. * are treated as normal Unicode code points.
  720. *
  721. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  722. * If this option is set and the input does not satisfy STD3 rules,
  723. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  724. *
  725. * @param status ICU error code in/out parameter.
  726. * Must fulfill U_SUCCESS before the function call.
  727. * @return <0 or 0 or >0 as usual for string comparisons
  728. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  729. */
  730. U_DEPRECATED int32_t U_EXPORT2
  731. uidna_compare( const UChar *s1, int32_t length1,
  732. const UChar *s2, int32_t length2,
  733. int32_t options,
  734. UErrorCode* status);
  735. #endif /* U_HIDE_DEPRECATED_API */
  736. #endif /* #if !UCONFIG_NO_IDNA */
  737. #endif