uidna.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uidna.h
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003feb1
  16. * created by: Ram Viswanadha
  17. */
  18. #ifndef __UIDNA_H__
  19. #define __UIDNA_H__
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_IDNA
  22. #include <stdbool.h>
  23. #include "unicode/parseerr.h"
  24. #if U_SHOW_CPLUSPLUS_API
  25. #include "unicode/localpointer.h"
  26. #endif // U_SHOW_CPLUSPLUS_API
  27. /**
  28. * \file
  29. * \brief C API: Internationalizing Domain Names in Applications (IDNA)
  30. *
  31. * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
  32. *
  33. * The C API functions which do take a UIDNA * service object pointer
  34. * implement UTS #46 and IDNA2008.
  35. *
  36. * IDNA2003 is obsolete.
  37. * The C API functions which do not take a service object pointer
  38. * implement IDNA2003. They are all deprecated.
  39. */
  40. /*
  41. * IDNA option bit set values.
  42. */
  43. enum {
  44. /**
  45. * Default options value: UTS #46 nontransitional processing.
  46. * For use in static worker and factory methods.
  47. *
  48. * Since ICU 76, this is the same as
  49. * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
  50. * corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
  51. * (These options are ignored by the IDNA2003 implementation.)
  52. *
  53. * Before ICU 76, this constant did not set any of the options.
  54. *
  55. * @stable ICU 2.6
  56. */
  57. UIDNA_DEFAULT=0x30,
  58. #ifndef U_HIDE_DEPRECATED_API
  59. /**
  60. * Option to allow unassigned code points in domain names and labels.
  61. * For use in static worker and factory methods.
  62. * <p>This option is ignored by the UTS46 implementation.
  63. * (UTS #46 disallows unassigned code points.)
  64. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  65. */
  66. UIDNA_ALLOW_UNASSIGNED=1,
  67. #endif /* U_HIDE_DEPRECATED_API */
  68. /**
  69. * Option to check whether the input conforms to the STD3 ASCII rules,
  70. * for example the restriction of labels to LDH characters
  71. * (ASCII Letters, Digits and Hyphen-Minus).
  72. * For use in static worker and factory methods.
  73. * @stable ICU 2.6
  74. */
  75. UIDNA_USE_STD3_RULES=2,
  76. /**
  77. * IDNA option to check for whether the input conforms to the BiDi rules.
  78. * For use in static worker and factory methods.
  79. * <p>This option is ignored by the IDNA2003 implementation.
  80. * (IDNA2003 always performs a BiDi check.)
  81. * @stable ICU 4.6
  82. */
  83. UIDNA_CHECK_BIDI=4,
  84. /**
  85. * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
  86. * For use in static worker and factory methods.
  87. * <p>This option is ignored by the IDNA2003 implementation.
  88. * (The CONTEXTJ check is new in IDNA2008.)
  89. * @stable ICU 4.6
  90. */
  91. UIDNA_CHECK_CONTEXTJ=8,
  92. /**
  93. * IDNA option for nontransitional processing in ToASCII().
  94. * For use in static worker and factory methods.
  95. *
  96. * <p>By default, ToASCII() uses transitional processing.
  97. * Unicode 15.1 UTS #46 deprecated transitional processing.
  98. *
  99. * <p>This option is ignored by the IDNA2003 implementation.
  100. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  101. * @stable ICU 4.6
  102. * @see UIDNA_DEFAULT
  103. */
  104. UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
  105. /**
  106. * IDNA option for nontransitional processing in ToUnicode().
  107. * For use in static worker and factory methods.
  108. *
  109. * <p>By default, ToUnicode() uses transitional processing.
  110. * Unicode 15.1 UTS #46 deprecated transitional processing.
  111. *
  112. * <p>This option is ignored by the IDNA2003 implementation.
  113. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  114. * @stable ICU 4.6
  115. * @see UIDNA_DEFAULT
  116. */
  117. UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
  118. /**
  119. * IDNA option to check for whether the input conforms to the CONTEXTO rules.
  120. * For use in static worker and factory methods.
  121. * <p>This option is ignored by the IDNA2003 implementation.
  122. * (The CONTEXTO check is new in IDNA2008.)
  123. * <p>This is for use by registries for IDNA2008 conformance.
  124. * UTS #46 does not require the CONTEXTO check.
  125. * @stable ICU 49
  126. */
  127. UIDNA_CHECK_CONTEXTO=0x40
  128. };
  129. /**
  130. * Opaque C service object type for the new IDNA API.
  131. * @stable ICU 4.6
  132. */
  133. struct UIDNA;
  134. typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
  135. /**
  136. * Returns a UIDNA instance which implements UTS #46.
  137. * Returns an unmodifiable instance, owned by the caller.
  138. * Cache it for multiple operations, and uidna_close() it when done.
  139. * The instance is thread-safe, that is, it can be used concurrently.
  140. *
  141. * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
  142. *
  143. * @param options Bit set to modify the processing and error checking.
  144. * These should include UIDNA_DEFAULT, or
  145. * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
  146. * See option bit set values in uidna.h.
  147. * @param pErrorCode Standard ICU error code. Its input value must
  148. * pass the U_SUCCESS() test, or else the function returns
  149. * immediately. Check for U_FAILURE() on output or use with
  150. * function chaining. (See User Guide for details.)
  151. * @return the UTS #46 UIDNA instance, if successful
  152. * @stable ICU 4.6
  153. */
  154. U_CAPI UIDNA * U_EXPORT2
  155. uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
  156. /**
  157. * Closes a UIDNA instance.
  158. * @param idna UIDNA instance to be closed
  159. * @stable ICU 4.6
  160. */
  161. U_CAPI void U_EXPORT2
  162. uidna_close(UIDNA *idna);
  163. #if U_SHOW_CPLUSPLUS_API
  164. U_NAMESPACE_BEGIN
  165. /**
  166. * \class LocalUIDNAPointer
  167. * "Smart pointer" class, closes a UIDNA via uidna_close().
  168. * For most methods see the LocalPointerBase base class.
  169. *
  170. * @see LocalPointerBase
  171. * @see LocalPointer
  172. * @stable ICU 4.6
  173. */
  174. U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
  175. U_NAMESPACE_END
  176. #endif
  177. /**
  178. * Output container for IDNA processing errors.
  179. * Initialize with UIDNA_INFO_INITIALIZER:
  180. * \code
  181. * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  182. * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
  183. * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
  184. * \endcode
  185. * @stable ICU 4.6
  186. */
  187. typedef struct UIDNAInfo {
  188. /** sizeof(UIDNAInfo) @stable ICU 4.6 */
  189. int16_t size;
  190. /**
  191. * Set to true if transitional and nontransitional processing produce different results.
  192. * For details see C++ IDNAInfo::isTransitionalDifferent().
  193. * @stable ICU 4.6
  194. */
  195. UBool isTransitionalDifferent;
  196. UBool reservedB3; /**< Reserved field, do not use. @internal */
  197. /**
  198. * Bit set indicating IDNA processing errors. 0 if no errors.
  199. * See UIDNA_ERROR_... constants.
  200. * @stable ICU 4.6
  201. */
  202. uint32_t errors;
  203. int32_t reservedI2; /**< Reserved field, do not use. @internal */
  204. int32_t reservedI3; /**< Reserved field, do not use. @internal */
  205. } UIDNAInfo;
  206. /**
  207. * Static initializer for a UIDNAInfo struct.
  208. * @stable ICU 4.6
  209. */
  210. #define UIDNA_INFO_INITIALIZER { \
  211. (int16_t)sizeof(UIDNAInfo), \
  212. false, false, \
  213. 0, 0, 0 }
  214. /**
  215. * Converts a single domain name label into its ASCII form for DNS lookup.
  216. * If any processing step fails, then pInfo->errors will be non-zero and
  217. * the result might not be an ASCII string.
  218. * The label might be modified according to the types of errors.
  219. * Labels with severe errors will be left in (or turned into) their Unicode form.
  220. *
  221. * The UErrorCode indicates an error only in exceptional cases,
  222. * such as a U_MEMORY_ALLOCATION_ERROR.
  223. *
  224. * @param idna UIDNA instance
  225. * @param label Input domain name label
  226. * @param length Label length, or -1 if NUL-terminated
  227. * @param dest Destination string buffer
  228. * @param capacity Destination buffer capacity
  229. * @param pInfo Output container of IDNA processing details.
  230. * @param pErrorCode Standard ICU error code. Its input value must
  231. * pass the U_SUCCESS() test, or else the function returns
  232. * immediately. Check for U_FAILURE() on output or use with
  233. * function chaining. (See User Guide for details.)
  234. * @return destination string length
  235. * @stable ICU 4.6
  236. */
  237. U_CAPI int32_t U_EXPORT2
  238. uidna_labelToASCII(const UIDNA *idna,
  239. const UChar *label, int32_t length,
  240. UChar *dest, int32_t capacity,
  241. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  242. /**
  243. * Converts a single domain name label into its Unicode form for human-readable display.
  244. * If any processing step fails, then pInfo->errors will be non-zero.
  245. * The label might be modified according to the types of errors.
  246. *
  247. * The UErrorCode indicates an error only in exceptional cases,
  248. * such as a U_MEMORY_ALLOCATION_ERROR.
  249. *
  250. * @param idna UIDNA instance
  251. * @param label Input domain name label
  252. * @param length Label length, or -1 if NUL-terminated
  253. * @param dest Destination string buffer
  254. * @param capacity Destination buffer capacity
  255. * @param pInfo Output container of IDNA processing details.
  256. * @param pErrorCode Standard ICU error code. Its input value must
  257. * pass the U_SUCCESS() test, or else the function returns
  258. * immediately. Check for U_FAILURE() on output or use with
  259. * function chaining. (See User Guide for details.)
  260. * @return destination string length
  261. * @stable ICU 4.6
  262. */
  263. U_CAPI int32_t U_EXPORT2
  264. uidna_labelToUnicode(const UIDNA *idna,
  265. const UChar *label, int32_t length,
  266. UChar *dest, int32_t capacity,
  267. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  268. /**
  269. * Converts a whole domain name into its ASCII form for DNS lookup.
  270. * If any processing step fails, then pInfo->errors will be non-zero and
  271. * the result might not be an ASCII string.
  272. * The domain name might be modified according to the types of errors.
  273. * Labels with severe errors will be left in (or turned into) their Unicode form.
  274. *
  275. * The UErrorCode indicates an error only in exceptional cases,
  276. * such as a U_MEMORY_ALLOCATION_ERROR.
  277. *
  278. * @param idna UIDNA instance
  279. * @param name Input domain name
  280. * @param length Domain name length, or -1 if NUL-terminated
  281. * @param dest Destination string buffer
  282. * @param capacity Destination buffer capacity
  283. * @param pInfo Output container of IDNA processing details.
  284. * @param pErrorCode Standard ICU error code. Its input value must
  285. * pass the U_SUCCESS() test, or else the function returns
  286. * immediately. Check for U_FAILURE() on output or use with
  287. * function chaining. (See User Guide for details.)
  288. * @return destination string length
  289. * @stable ICU 4.6
  290. */
  291. U_CAPI int32_t U_EXPORT2
  292. uidna_nameToASCII(const UIDNA *idna,
  293. const UChar *name, int32_t length,
  294. UChar *dest, int32_t capacity,
  295. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  296. /**
  297. * Converts a whole domain name into its Unicode form for human-readable display.
  298. * If any processing step fails, then pInfo->errors will be non-zero.
  299. * The domain name might be modified according to the types of errors.
  300. *
  301. * The UErrorCode indicates an error only in exceptional cases,
  302. * such as a U_MEMORY_ALLOCATION_ERROR.
  303. *
  304. * @param idna UIDNA instance
  305. * @param name Input domain name
  306. * @param length Domain name length, or -1 if NUL-terminated
  307. * @param dest Destination string buffer
  308. * @param capacity Destination buffer capacity
  309. * @param pInfo Output container of IDNA processing details.
  310. * @param pErrorCode Standard ICU error code. Its input value must
  311. * pass the U_SUCCESS() test, or else the function returns
  312. * immediately. Check for U_FAILURE() on output or use with
  313. * function chaining. (See User Guide for details.)
  314. * @return destination string length
  315. * @stable ICU 4.6
  316. */
  317. U_CAPI int32_t U_EXPORT2
  318. uidna_nameToUnicode(const UIDNA *idna,
  319. const UChar *name, int32_t length,
  320. UChar *dest, int32_t capacity,
  321. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  322. /* UTF-8 versions of the processing methods --------------------------------- */
  323. /**
  324. * Converts a single domain name label into its ASCII form for DNS lookup.
  325. * UTF-8 version of uidna_labelToASCII(), same behavior.
  326. *
  327. * @param idna UIDNA instance
  328. * @param label Input domain name label
  329. * @param length Label length, or -1 if NUL-terminated
  330. * @param dest Destination string buffer
  331. * @param capacity Destination buffer capacity
  332. * @param pInfo Output container of IDNA processing details.
  333. * @param pErrorCode Standard ICU error code. Its input value must
  334. * pass the U_SUCCESS() test, or else the function returns
  335. * immediately. Check for U_FAILURE() on output or use with
  336. * function chaining. (See User Guide for details.)
  337. * @return destination string length
  338. * @stable ICU 4.6
  339. */
  340. U_CAPI int32_t U_EXPORT2
  341. uidna_labelToASCII_UTF8(const UIDNA *idna,
  342. const char *label, int32_t length,
  343. char *dest, int32_t capacity,
  344. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  345. /**
  346. * Converts a single domain name label into its Unicode form for human-readable display.
  347. * UTF-8 version of uidna_labelToUnicode(), same behavior.
  348. *
  349. * @param idna UIDNA instance
  350. * @param label Input domain name label
  351. * @param length Label length, or -1 if NUL-terminated
  352. * @param dest Destination string buffer
  353. * @param capacity Destination buffer capacity
  354. * @param pInfo Output container of IDNA processing details.
  355. * @param pErrorCode Standard ICU error code. Its input value must
  356. * pass the U_SUCCESS() test, or else the function returns
  357. * immediately. Check for U_FAILURE() on output or use with
  358. * function chaining. (See User Guide for details.)
  359. * @return destination string length
  360. * @stable ICU 4.6
  361. */
  362. U_CAPI int32_t U_EXPORT2
  363. uidna_labelToUnicodeUTF8(const UIDNA *idna,
  364. const char *label, int32_t length,
  365. char *dest, int32_t capacity,
  366. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  367. /**
  368. * Converts a whole domain name into its ASCII form for DNS lookup.
  369. * UTF-8 version of uidna_nameToASCII(), same behavior.
  370. *
  371. * @param idna UIDNA instance
  372. * @param name Input domain name
  373. * @param length Domain name length, or -1 if NUL-terminated
  374. * @param dest Destination string buffer
  375. * @param capacity Destination buffer capacity
  376. * @param pInfo Output container of IDNA processing details.
  377. * @param pErrorCode Standard ICU error code. Its input value must
  378. * pass the U_SUCCESS() test, or else the function returns
  379. * immediately. Check for U_FAILURE() on output or use with
  380. * function chaining. (See User Guide for details.)
  381. * @return destination string length
  382. * @stable ICU 4.6
  383. */
  384. U_CAPI int32_t U_EXPORT2
  385. uidna_nameToASCII_UTF8(const UIDNA *idna,
  386. const char *name, int32_t length,
  387. char *dest, int32_t capacity,
  388. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  389. /**
  390. * Converts a whole domain name into its Unicode form for human-readable display.
  391. * UTF-8 version of uidna_nameToUnicode(), same behavior.
  392. *
  393. * @param idna UIDNA instance
  394. * @param name Input domain name
  395. * @param length Domain name length, or -1 if NUL-terminated
  396. * @param dest Destination string buffer
  397. * @param capacity Destination buffer capacity
  398. * @param pInfo Output container of IDNA processing details.
  399. * @param pErrorCode Standard ICU error code. Its input value must
  400. * pass the U_SUCCESS() test, or else the function returns
  401. * immediately. Check for U_FAILURE() on output or use with
  402. * function chaining. (See User Guide for details.)
  403. * @return destination string length
  404. * @stable ICU 4.6
  405. */
  406. U_CAPI int32_t U_EXPORT2
  407. uidna_nameToUnicodeUTF8(const UIDNA *idna,
  408. const char *name, int32_t length,
  409. char *dest, int32_t capacity,
  410. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  411. /*
  412. * IDNA error bit set values.
  413. * When a domain name or label fails a processing step or does not meet the
  414. * validity criteria, then one or more of these error bits are set.
  415. */
  416. enum {
  417. /**
  418. * A non-final domain name label (or the whole domain name) is empty.
  419. * @stable ICU 4.6
  420. */
  421. UIDNA_ERROR_EMPTY_LABEL=1,
  422. /**
  423. * A domain name label is longer than 63 bytes.
  424. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  425. * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
  426. * @stable ICU 4.6
  427. */
  428. UIDNA_ERROR_LABEL_TOO_LONG=2,
  429. /**
  430. * A domain name is longer than 255 bytes in its storage form.
  431. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  432. * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
  433. * @stable ICU 4.6
  434. */
  435. UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
  436. /**
  437. * A label starts with a hyphen-minus ('-').
  438. * @stable ICU 4.6
  439. */
  440. UIDNA_ERROR_LEADING_HYPHEN=8,
  441. /**
  442. * A label ends with a hyphen-minus ('-').
  443. * @stable ICU 4.6
  444. */
  445. UIDNA_ERROR_TRAILING_HYPHEN=0x10,
  446. /**
  447. * A label contains hyphen-minus ('-') in the third and fourth positions.
  448. * @stable ICU 4.6
  449. */
  450. UIDNA_ERROR_HYPHEN_3_4=0x20,
  451. /**
  452. * A label starts with a combining mark.
  453. * @stable ICU 4.6
  454. */
  455. UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
  456. /**
  457. * A label or domain name contains disallowed characters.
  458. * @stable ICU 4.6
  459. */
  460. UIDNA_ERROR_DISALLOWED=0x80,
  461. /**
  462. * A label starts with "xn--" but does not contain valid Punycode.
  463. * That is, an xn-- label failed Punycode decoding.
  464. * @stable ICU 4.6
  465. */
  466. UIDNA_ERROR_PUNYCODE=0x100,
  467. /**
  468. * A label contains a dot=full stop.
  469. * This can occur in an input string for a single-label function.
  470. * @stable ICU 4.6
  471. */
  472. UIDNA_ERROR_LABEL_HAS_DOT=0x200,
  473. /**
  474. * An ACE label does not contain a valid label string.
  475. * The label was successfully ACE (Punycode) decoded but the resulting
  476. * string had severe validation errors. For example,
  477. * it might contain characters that are not allowed in ACE labels,
  478. * or it might not be normalized.
  479. * @stable ICU 4.6
  480. */
  481. UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
  482. /**
  483. * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
  484. * @stable ICU 4.6
  485. */
  486. UIDNA_ERROR_BIDI=0x800,
  487. /**
  488. * A label does not meet the IDNA CONTEXTJ requirements.
  489. * @stable ICU 4.6
  490. */
  491. UIDNA_ERROR_CONTEXTJ=0x1000,
  492. /**
  493. * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
  494. * Some punctuation characters "Would otherwise have been DISALLOWED"
  495. * but are allowed in certain contexts. (RFC 5892)
  496. * @stable ICU 49
  497. */
  498. UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
  499. /**
  500. * A label does not meet the IDNA CONTEXTO requirements for digits.
  501. * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
  502. * @stable ICU 49
  503. */
  504. UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
  505. };
  506. #ifndef U_HIDE_DEPRECATED_API
  507. /* IDNA2003 API ------------------------------------------------------------- */
  508. /**
  509. * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
  510. * This operation is done on <b>single labels</b> before sending it to something that expects
  511. * ASCII names. A label is an individual part of a domain name. Labels are usually
  512. * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  513. *
  514. * IDNA2003 API Overview:
  515. *
  516. * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
  517. * (http://www.ietf.org/rfc/rfc3490.txt).
  518. * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
  519. * containing non-ASCII code points are processed by the
  520. * ToASCII operation before passing it to resolver libraries. Domain names
  521. * that are obtained from resolver libraries are processed by the
  522. * ToUnicode operation before displaying the domain name to the user.
  523. * IDNA requires that implementations process input strings with Nameprep
  524. * (http://www.ietf.org/rfc/rfc3491.txt),
  525. * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
  526. * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
  527. * Implementations of IDNA MUST fully implement Nameprep and Punycode;
  528. * neither Nameprep nor Punycode are optional.
  529. * The input and output of ToASCII and ToUnicode operations are Unicode
  530. * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
  531. * multiple times to an input string will yield the same result as applying the operation
  532. * once.
  533. * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
  534. * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
  535. *
  536. * @param src Input UChar array containing label in Unicode.
  537. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  538. * @param dest Output UChar array with ASCII (ACE encoded) label.
  539. * @param destCapacity Size of dest.
  540. * @param options A bit set of options:
  541. *
  542. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  543. * and do not use STD3 ASCII rules
  544. * If unassigned code points are found the operation fails with
  545. * U_UNASSIGNED_ERROR error code.
  546. *
  547. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  548. * If this option is set, the unassigned code points are in the input
  549. * are treated as normal Unicode code points.
  550. *
  551. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  552. * If this option is set and the input does not satisfy STD3 rules,
  553. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  554. *
  555. * @param parseError Pointer to UParseError struct to receive information on position
  556. * of error if an error is encountered. Can be NULL.
  557. * @param status ICU in/out error code parameter.
  558. * U_INVALID_CHAR_FOUND if src contains
  559. * unmatched single surrogates.
  560. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  561. * too many code points.
  562. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  563. * @return The length of the result string, if successful - or in case of a buffer overflow,
  564. * in which case it will be greater than destCapacity.
  565. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  566. */
  567. U_DEPRECATED int32_t U_EXPORT2
  568. uidna_toASCII(const UChar* src, int32_t srcLength,
  569. UChar* dest, int32_t destCapacity,
  570. int32_t options,
  571. UParseError* parseError,
  572. UErrorCode* status);
  573. /**
  574. * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
  575. * This operation is done on <b>single labels</b> before sending it to something that expects
  576. * Unicode names. A label is an individual part of a domain name. Labels are usually
  577. * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  578. *
  579. * @param src Input UChar array containing ASCII (ACE encoded) label.
  580. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  581. * @param dest Output Converted UChar array containing Unicode equivalent of label.
  582. * @param destCapacity Size of dest.
  583. * @param options A bit set of options:
  584. *
  585. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  586. * and do not use STD3 ASCII rules
  587. * If unassigned code points are found the operation fails with
  588. * U_UNASSIGNED_ERROR error code.
  589. *
  590. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  591. * If this option is set, the unassigned code points are in the input
  592. * are treated as normal Unicode code points. <b> Note: </b> This option is
  593. * required on toUnicode operation because the RFC mandates
  594. * verification of decoded ACE input by applying toASCII and comparing
  595. * its output with source
  596. *
  597. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  598. * If this option is set and the input does not satisfy STD3 rules,
  599. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  600. *
  601. * @param parseError Pointer to UParseError struct to receive information on position
  602. * of error if an error is encountered. Can be NULL.
  603. * @param status ICU in/out error code parameter.
  604. * U_INVALID_CHAR_FOUND if src contains
  605. * unmatched single surrogates.
  606. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  607. * too many code points.
  608. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  609. * @return The length of the result string, if successful - or in case of a buffer overflow,
  610. * in which case it will be greater than destCapacity.
  611. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  612. */
  613. U_DEPRECATED int32_t U_EXPORT2
  614. uidna_toUnicode(const UChar* src, int32_t srcLength,
  615. UChar* dest, int32_t destCapacity,
  616. int32_t options,
  617. UParseError* parseError,
  618. UErrorCode* status);
  619. /**
  620. * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
  621. * This operation is done on complete domain names, e.g: "www.example.com".
  622. * It is important to note that this operation can fail. If it fails, then the input
  623. * domain name cannot be used as an Internationalized Domain Name and the application
  624. * should have methods defined to deal with the failure.
  625. *
  626. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  627. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  628. * and then convert. This function does not offer that level of granularity. The options once
  629. * set will apply to all labels in the domain name
  630. *
  631. * @param src Input UChar array containing IDN in Unicode.
  632. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  633. * @param dest Output UChar array with ASCII (ACE encoded) IDN.
  634. * @param destCapacity Size of dest.
  635. * @param options A bit set of options:
  636. *
  637. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  638. * and do not use STD3 ASCII rules
  639. * If unassigned code points are found the operation fails with
  640. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  641. *
  642. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  643. * If this option is set, the unassigned code points are in the input
  644. * are treated as normal Unicode code points.
  645. *
  646. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  647. * If this option is set and the input does not satisfy STD3 rules,
  648. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  649. *
  650. * @param parseError Pointer to UParseError struct to receive information on position
  651. * of error if an error is encountered. Can be NULL.
  652. * @param status ICU in/out error code parameter.
  653. * U_INVALID_CHAR_FOUND if src contains
  654. * unmatched single surrogates.
  655. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  656. * too many code points.
  657. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  658. * @return The length of the result string, if successful - or in case of a buffer overflow,
  659. * in which case it will be greater than destCapacity.
  660. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  661. */
  662. U_DEPRECATED int32_t U_EXPORT2
  663. uidna_IDNToASCII( const UChar* src, int32_t srcLength,
  664. UChar* dest, int32_t destCapacity,
  665. int32_t options,
  666. UParseError* parseError,
  667. UErrorCode* status);
  668. /**
  669. * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
  670. * This operation is done on complete domain names, e.g: "www.example.com".
  671. *
  672. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  673. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  674. * and then convert. This function does not offer that level of granularity. The options once
  675. * set will apply to all labels in the domain name
  676. *
  677. * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
  678. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  679. * @param dest Output UChar array containing Unicode equivalent of source IDN.
  680. * @param destCapacity Size of dest.
  681. * @param options A bit set of options:
  682. *
  683. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  684. * and do not use STD3 ASCII rules
  685. * If unassigned code points are found the operation fails with
  686. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  687. *
  688. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  689. * If this option is set, the unassigned code points are in the input
  690. * are treated as normal Unicode code points.
  691. *
  692. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  693. * If this option is set and the input does not satisfy STD3 rules,
  694. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  695. *
  696. * @param parseError Pointer to UParseError struct to receive information on position
  697. * of error if an error is encountered. Can be NULL.
  698. * @param status ICU in/out error code parameter.
  699. * U_INVALID_CHAR_FOUND if src contains
  700. * unmatched single surrogates.
  701. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  702. * too many code points.
  703. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  704. * @return The length of the result string, if successful - or in case of a buffer overflow,
  705. * in which case it will be greater than destCapacity.
  706. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  707. */
  708. U_DEPRECATED int32_t U_EXPORT2
  709. uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
  710. UChar* dest, int32_t destCapacity,
  711. int32_t options,
  712. UParseError* parseError,
  713. UErrorCode* status);
  714. /**
  715. * IDNA2003: Compare two IDN strings for equivalence.
  716. * This function splits the domain names into labels and compares them.
  717. * According to IDN RFC, whenever two labels are compared, they are
  718. * considered equal if and only if their ASCII forms (obtained by
  719. * applying toASCII) match using an case-insensitive ASCII comparison.
  720. * Two domain names are considered a match if and only if all labels
  721. * match regardless of whether label separators match.
  722. *
  723. * @param s1 First source string.
  724. * @param length1 Length of first source string, or -1 if NUL-terminated.
  725. *
  726. * @param s2 Second source string.
  727. * @param length2 Length of second source string, or -1 if NUL-terminated.
  728. * @param options A bit set of options:
  729. *
  730. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  731. * and do not use STD3 ASCII rules
  732. * If unassigned code points are found the operation fails with
  733. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  734. *
  735. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  736. * If this option is set, the unassigned code points are in the input
  737. * are treated as normal Unicode code points.
  738. *
  739. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  740. * If this option is set and the input does not satisfy STD3 rules,
  741. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  742. *
  743. * @param status ICU error code in/out parameter.
  744. * Must fulfill U_SUCCESS before the function call.
  745. * @return <0 or 0 or >0 as usual for string comparisons
  746. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  747. */
  748. U_DEPRECATED int32_t U_EXPORT2
  749. uidna_compare( const UChar *s1, int32_t length1,
  750. const UChar *s2, int32_t length2,
  751. int32_t options,
  752. UErrorCode* status);
  753. #endif /* U_HIDE_DEPRECATED_API */
  754. #endif /* #if !UCONFIG_NO_IDNA */
  755. #endif