idna.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926
  1. /* idna.c --- Prototypes for Internationalized Domain Name library.
  2. Copyright (C) 2002-2024 Simon Josefsson
  3. This file is part of GNU Libidn.
  4. GNU Libidn is free software: you can redistribute it and/or
  5. modify it under the terms of either:
  6. * the GNU Lesser General Public License as published by the Free
  7. Software Foundation; either version 3 of the License, or (at
  8. your option) any later version.
  9. or
  10. * the GNU General Public License as published by the Free
  11. Software Foundation; either version 2 of the License, or (at
  12. your option) any later version.
  13. or both in parallel, as here.
  14. GNU Libidn is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. General Public License for more details.
  18. You should have received copies of the GNU General Public License and
  19. the GNU Lesser General Public License along with this program. If
  20. not, see <https://www.gnu.org/licenses/>. */
  21. #ifdef HAVE_CONFIG_H
  22. # include "config.h"
  23. #endif
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <stringprep.h>
  27. #include <punycode.h>
  28. #include "idna.h"
  29. /* Get c_strcasecmp. */
  30. #include <c-strcase.h>
  31. #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
  32. (c) == 0xFF0E || (c) == 0xFF61)
  33. #ifdef WITH_VALGRIND
  34. static size_t STRLEN(const char *s) {
  35. size_t ret = 0;
  36. while (*s++)
  37. ++ret;
  38. return ret;
  39. }
  40. static char* STRCPY(char* destination, const char* source) {
  41. char *p = destination;
  42. while (*source)
  43. *p++ = *source++;
  44. *p = 0;
  45. return destination;
  46. }
  47. static char* STRCAT(char* destination, const char* source) {
  48. char *p = destination;
  49. while (*p)
  50. ++p;
  51. while (*source)
  52. *p++ = *source++;
  53. *p = 0;
  54. return destination;
  55. }
  56. #else //WITH_VALGRIND
  57. # define STRLEN(s) strlen(s)
  58. # define STRCAT(d, s) strcat(d, s)
  59. # define STRCPY(d, s) strcpy(d, s)
  60. #endif
  61. /* Core functions */
  62. /**
  63. * idna_to_ascii_4i:
  64. * @in: input array with unicode code points.
  65. * @inlen: length of input array with unicode code points.
  66. * @out: output zero terminated string that must have room for at
  67. * least IDNA_LABEL_MAX_LENGTH characters plus the terminating zero.
  68. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  69. * %IDNA_USE_STD3_ASCII_RULES.
  70. *
  71. * The ToASCII operation takes a sequence of Unicode code points that
  72. * make up one domain label and transforms it into a sequence of code
  73. * points in the ASCII range (0..7F). If ToASCII succeeds, the
  74. * original sequence and the resulting sequence are equivalent labels.
  75. *
  76. * It is important to note that the ToASCII operation can fail. ToASCII
  77. * fails if any step of it fails. If any step of the ToASCII operation
  78. * fails on any label in a domain name, that domain name MUST NOT be used
  79. * as an internationalized domain name. The method for deadling with this
  80. * failure is application-specific.
  81. *
  82. * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
  83. * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
  84. * sequence of ASCII code points or a failure condition.
  85. *
  86. * ToASCII never alters a sequence of code points that are all in the ASCII
  87. * range to begin with (although it could fail). Applying the ToASCII
  88. * operation multiple times has exactly the same effect as applying it just
  89. * once.
  90. *
  91. * Return value: Returns 0 on success, or an #Idna_rc error code.
  92. */
  93. int
  94. idna_to_ascii_4i (const uint32_t *in, size_t inlen, char *out, int flags)
  95. {
  96. size_t len, outlen;
  97. uint32_t *src; /* XXX don't need to copy data? */
  98. int rc;
  99. /*
  100. * ToASCII consists of the following steps:
  101. *
  102. * 1. If all code points in the sequence are in the ASCII range (0..7F)
  103. * then skip to step 3.
  104. */
  105. {
  106. size_t i;
  107. int inasciirange;
  108. inasciirange = 1;
  109. for (i = 0; i < inlen; i++)
  110. if (in[i] > 0x7F)
  111. inasciirange = 0;
  112. if (inasciirange)
  113. {
  114. src = malloc (sizeof (in[0]) * (inlen + 1));
  115. if (src == NULL)
  116. return IDNA_MALLOC_ERROR;
  117. memcpy (src, in, sizeof (in[0]) * inlen);
  118. src[inlen] = 0;
  119. goto step3;
  120. }
  121. }
  122. /*
  123. * 2. Perform the steps specified in [NAMEPREP] and fail if there is
  124. * an error. The AllowUnassigned flag is used in [NAMEPREP].
  125. */
  126. {
  127. char *p;
  128. p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
  129. if (p == NULL)
  130. return IDNA_MALLOC_ERROR;
  131. len = STRLEN (p);
  132. do
  133. {
  134. char *newp;
  135. len = 2 * len + 10; /* XXX better guess? */
  136. newp = realloc (p, len);
  137. if (newp == NULL)
  138. {
  139. free (p);
  140. return IDNA_MALLOC_ERROR;
  141. }
  142. p = newp;
  143. if (flags & IDNA_ALLOW_UNASSIGNED)
  144. rc = stringprep_nameprep (p, len);
  145. else
  146. rc = stringprep_nameprep_no_unassigned (p, len);
  147. }
  148. while (rc == STRINGPREP_TOO_SMALL_BUFFER);
  149. if (rc != STRINGPREP_OK)
  150. {
  151. free (p);
  152. return IDNA_STRINGPREP_ERROR;
  153. }
  154. src = stringprep_utf8_to_ucs4 (p, -1, NULL);
  155. free (p);
  156. if (!src)
  157. return IDNA_MALLOC_ERROR;
  158. }
  159. step3:
  160. /*
  161. * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
  162. *
  163. * (a) Verify the absence of non-LDH ASCII code points; that is,
  164. * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
  165. *
  166. * (b) Verify the absence of leading and trailing hyphen-minus;
  167. * that is, the absence of U+002D at the beginning and end of
  168. * the sequence.
  169. */
  170. if (flags & IDNA_USE_STD3_ASCII_RULES)
  171. {
  172. size_t i;
  173. for (i = 0; src[i]; i++)
  174. if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
  175. (src[i] >= 0x3A && src[i] <= 0x40) ||
  176. (src[i] >= 0x5B && src[i] <= 0x60) ||
  177. (src[i] >= 0x7B && src[i] <= 0x7F))
  178. {
  179. free (src);
  180. return IDNA_CONTAINS_NON_LDH;
  181. }
  182. if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
  183. {
  184. free (src);
  185. return IDNA_CONTAINS_MINUS;
  186. }
  187. }
  188. /*
  189. * 4. If all code points in the sequence are in the ASCII range
  190. * (0..7F), then skip to step 8.
  191. */
  192. {
  193. size_t i;
  194. int inasciirange;
  195. inasciirange = 1;
  196. for (i = 0; src[i]; i++)
  197. {
  198. if (src[i] > 0x7F)
  199. inasciirange = 0;
  200. /* copy string to output buffer if we are about to skip to step8 */
  201. if (i < IDNA_LABEL_MAX_LENGTH)
  202. out[i] = src[i];
  203. }
  204. if (i < IDNA_LABEL_MAX_LENGTH)
  205. out[i] = '\0';
  206. else
  207. {
  208. out[IDNA_LABEL_MAX_LENGTH] = 0;
  209. free (src);
  210. return IDNA_INVALID_LENGTH;
  211. }
  212. if (inasciirange)
  213. goto step8;
  214. }
  215. /*
  216. * 5. Verify that the sequence does NOT begin with the ACE prefix.
  217. *
  218. */
  219. {
  220. size_t i;
  221. int match;
  222. match = 1;
  223. for (i = 0; match && i < STRLEN (IDNA_ACE_PREFIX); i++)
  224. if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
  225. match = 0;
  226. if (match)
  227. {
  228. free (src);
  229. return IDNA_CONTAINS_ACE_PREFIX;
  230. }
  231. }
  232. /*
  233. * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
  234. * and fail if there is an error.
  235. */
  236. for (len = 0; src[len]; len++)
  237. ;
  238. src[len] = '\0';
  239. outlen = IDNA_LABEL_MAX_LENGTH - STRLEN (IDNA_ACE_PREFIX);
  240. rc = punycode_encode (len, src, NULL,
  241. &outlen, &out[STRLEN (IDNA_ACE_PREFIX)]);
  242. if (rc != PUNYCODE_SUCCESS)
  243. {
  244. free (src);
  245. return IDNA_PUNYCODE_ERROR;
  246. }
  247. out[STRLEN (IDNA_ACE_PREFIX) + outlen] = '\0';
  248. /*
  249. * 7. Prepend the ACE prefix.
  250. */
  251. memcpy (out, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX));
  252. /*
  253. * 8. Verify that the number of code points is in the range 1 to IDNA_LABEL_MAX_LENGTH
  254. * inclusive (0 is excluded).
  255. */
  256. step8:
  257. free (src);
  258. if (STRLEN (out) < 1 || STRLEN (out) > IDNA_LABEL_MAX_LENGTH - 1)
  259. return IDNA_INVALID_LENGTH;
  260. return IDNA_SUCCESS;
  261. }
  262. /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */
  263. static int
  264. idna_to_unicode_internal (char *utf8in,
  265. uint32_t *out, size_t *outlen, int flags)
  266. {
  267. int rc;
  268. char tmpout[IDNA_LABEL_MAX_LENGTH + 1];
  269. size_t utf8len = STRLEN (utf8in) + 1;
  270. size_t addlen = 0, addinc = utf8len / 10 + 1;
  271. /*
  272. * ToUnicode consists of the following steps:
  273. *
  274. * 1. If the sequence contains any code points outside the ASCII range
  275. * (0..7F) then proceed to step 2, otherwise skip to step 3.
  276. */
  277. {
  278. size_t i;
  279. int inasciirange;
  280. inasciirange = 1;
  281. for (i = 0; utf8in[i]; i++)
  282. if (utf8in[i] & ~0x7F)
  283. inasciirange = 0;
  284. if (inasciirange)
  285. goto step3;
  286. }
  287. /*
  288. * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
  289. * error. (If step 3 of ToASCII is also performed here, it will not
  290. * affect the overall behavior of ToUnicode, but it is not
  291. * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
  292. */
  293. do
  294. {
  295. char *newp = realloc (utf8in, utf8len + addlen);
  296. if (newp == NULL)
  297. {
  298. free (utf8in);
  299. return IDNA_MALLOC_ERROR;
  300. }
  301. utf8in = newp;
  302. if (flags & IDNA_ALLOW_UNASSIGNED)
  303. rc = stringprep_nameprep (utf8in, utf8len + addlen);
  304. else
  305. rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
  306. addlen += addinc;
  307. addinc *= 2;
  308. }
  309. while (rc == STRINGPREP_TOO_SMALL_BUFFER);
  310. if (rc != STRINGPREP_OK)
  311. {
  312. free (utf8in);
  313. return IDNA_STRINGPREP_ERROR;
  314. }
  315. /* 3. Verify that the sequence begins with the ACE prefix, and save a
  316. * copy of the sequence.
  317. * ... The ToASCII and ToUnicode operations MUST recognize the ACE
  318. prefix in a case-insensitive manner.
  319. */
  320. step3:
  321. if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX)) != 0)
  322. {
  323. free (utf8in);
  324. return IDNA_NO_ACE_PREFIX;
  325. }
  326. /* 4. Remove the ACE prefix.
  327. */
  328. memmove (utf8in, &utf8in[STRLEN (IDNA_ACE_PREFIX)],
  329. STRLEN (utf8in) - STRLEN (IDNA_ACE_PREFIX) + 1);
  330. /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
  331. * and fail if there is an error. Save a copy of the result of
  332. * this step.
  333. */
  334. (*outlen)--; /* reserve one for the zero */
  335. rc = punycode_decode (STRLEN (utf8in), utf8in, outlen, out, NULL);
  336. if (rc != PUNYCODE_SUCCESS)
  337. {
  338. free (utf8in);
  339. return IDNA_PUNYCODE_ERROR;
  340. }
  341. out[*outlen] = 0; /* add zero */
  342. /* 6. Apply ToASCII.
  343. */
  344. rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
  345. if (rc != IDNA_SUCCESS)
  346. {
  347. free (utf8in);
  348. return rc;
  349. }
  350. /* 7. Verify that the result of step 6 matches the saved copy from
  351. * step 3, using a case-insensitive ASCII comparison.
  352. */
  353. if (c_strncasecmp (tmpout, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX)) != 0)
  354. {
  355. free (utf8in);
  356. return IDNA_ROUNDTRIP_VERIFY_ERROR;
  357. }
  358. if (c_strcasecmp (utf8in, tmpout + STRLEN (IDNA_ACE_PREFIX)) != 0)
  359. {
  360. free (utf8in);
  361. return IDNA_ROUNDTRIP_VERIFY_ERROR;
  362. }
  363. /* 8. Return the saved copy from step 5.
  364. */
  365. free (utf8in);
  366. return IDNA_SUCCESS;
  367. }
  368. /**
  369. * idna_to_unicode_44i:
  370. * @in: input array with unicode code points.
  371. * @inlen: length of input array with unicode code points.
  372. * @out: output array with unicode code points.
  373. * @outlen: on input, maximum size of output array with unicode code points,
  374. * on exit, actual size of output array with unicode code points.
  375. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  376. * %IDNA_USE_STD3_ASCII_RULES.
  377. *
  378. * The ToUnicode operation takes a sequence of Unicode code points
  379. * that make up one domain label and returns a sequence of Unicode
  380. * code points. If the input sequence is a label in ACE form, then the
  381. * result is an equivalent internationalized label that is not in ACE
  382. * form, otherwise the original sequence is returned unaltered.
  383. *
  384. * ToUnicode never fails. If any step fails, then the original input
  385. * sequence is returned immediately in that step.
  386. *
  387. * The Punycode decoder can never output more code points than it
  388. * inputs, but Nameprep can, and therefore ToUnicode can. Note that
  389. * the number of octets needed to represent a sequence of code points
  390. * depends on the particular character encoding used.
  391. *
  392. * The inputs to ToUnicode are a sequence of code points, the
  393. * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
  394. * ToUnicode is always a sequence of Unicode code points.
  395. *
  396. * Return value: Returns #Idna_rc error condition, but it must only be
  397. * used for debugging purposes. The output buffer is always
  398. * guaranteed to contain the correct data according to the
  399. * specification (sans malloc induced errors). NB! This means that
  400. * you normally ignore the return code from this function, as
  401. * checking it means breaking the standard.
  402. */
  403. int
  404. idna_to_unicode_44i (const uint32_t *in, size_t inlen,
  405. uint32_t *out, size_t *outlen, int flags)
  406. {
  407. int rc;
  408. size_t outlensave = *outlen;
  409. char *p;
  410. p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
  411. if (p == NULL)
  412. return IDNA_MALLOC_ERROR;
  413. rc = idna_to_unicode_internal (p, out, outlen, flags);
  414. if (rc != IDNA_SUCCESS)
  415. {
  416. memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
  417. inlen : outlensave));
  418. *outlen = inlen;
  419. }
  420. /* p is freed in idna_to_unicode_internal. */
  421. return rc;
  422. }
  423. /* Wrappers that handle several labels */
  424. /**
  425. * idna_to_ascii_4z:
  426. * @input: zero terminated input Unicode string.
  427. * @output: pointer to newly allocated output string.
  428. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  429. * %IDNA_USE_STD3_ASCII_RULES.
  430. *
  431. * Convert UCS-4 domain name to ASCII string. The domain name may
  432. * contain several labels, separated by dots. The output buffer must
  433. * be deallocated by the caller.
  434. *
  435. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  436. **/
  437. int
  438. idna_to_ascii_4z (const uint32_t *input, char **output, int flags)
  439. {
  440. const uint32_t *start = input;
  441. const uint32_t *end;
  442. char buf[1<<9];
  443. char *out = NULL;
  444. int rc;
  445. /* 1) Whenever dots are used as label separators, the following
  446. characters MUST be recognized as dots: U+002E (full stop),
  447. U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
  448. U+FF61 (halfwidth ideographic full stop). */
  449. if (input[0] == 0)
  450. {
  451. /* Handle implicit zero-length root label. */
  452. *output = malloc (1);
  453. if (!*output)
  454. return IDNA_MALLOC_ERROR;
  455. STRCPY (*output, "");
  456. return IDNA_SUCCESS;
  457. }
  458. if (DOTP (input[0]) && input[1] == 0)
  459. {
  460. /* Handle explicit zero-length root label. */
  461. *output = malloc (2);
  462. if (!*output)
  463. return IDNA_MALLOC_ERROR;
  464. STRCPY (*output, ".");
  465. return IDNA_SUCCESS;
  466. }
  467. *output = NULL;
  468. do
  469. {
  470. end = start;
  471. for (; *end && !DOTP (*end); end++)
  472. ;
  473. if (*end == '\0' && start == end)
  474. {
  475. /* Handle explicit zero-length root label. */
  476. buf[0] = '\0';
  477. }
  478. else
  479. {
  480. rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
  481. if (rc != IDNA_SUCCESS)
  482. {
  483. free (out);
  484. return rc;
  485. }
  486. }
  487. if (out)
  488. {
  489. size_t l = STRLEN (out) + 1 + STRLEN (buf) + 1;
  490. char *newp = realloc (out, l);
  491. if (!newp)
  492. {
  493. free (out);
  494. return IDNA_MALLOC_ERROR;
  495. }
  496. out = newp;
  497. STRCAT (out, ".");
  498. STRCAT (out, buf);
  499. }
  500. else
  501. {
  502. out = strdup (buf);
  503. if (!out)
  504. return IDNA_MALLOC_ERROR;
  505. }
  506. start = end + 1;
  507. }
  508. while (*end);
  509. *output = out;
  510. return IDNA_SUCCESS;
  511. }
  512. /**
  513. * idna_to_ascii_8z:
  514. * @input: zero terminated input UTF-8 string.
  515. * @output: pointer to newly allocated output string.
  516. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  517. * %IDNA_USE_STD3_ASCII_RULES.
  518. *
  519. * Convert UTF-8 domain name to ASCII string. The domain name may
  520. * contain several labels, separated by dots. The output buffer must
  521. * be deallocated by the caller.
  522. *
  523. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  524. **/
  525. int
  526. idna_to_ascii_8z (const char *input, char **output, int flags)
  527. {
  528. uint32_t *ucs4;
  529. size_t ucs4len;
  530. int rc;
  531. ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
  532. if (!ucs4)
  533. return IDNA_ICONV_ERROR;
  534. rc = idna_to_ascii_4z (ucs4, output, flags);
  535. free (ucs4);
  536. return rc;
  537. }
  538. /**
  539. * idna_to_ascii_lz:
  540. * @input: zero terminated input string encoded in the current locale's
  541. * character set.
  542. * @output: pointer to newly allocated output string.
  543. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  544. * %IDNA_USE_STD3_ASCII_RULES.
  545. *
  546. * Convert domain name in the locale's encoding to ASCII string. The
  547. * domain name may contain several labels, separated by dots. The
  548. * output buffer must be deallocated by the caller.
  549. *
  550. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  551. **/
  552. int
  553. idna_to_ascii_lz (const char *input, char **output, int flags)
  554. {
  555. char *utf8;
  556. int rc;
  557. utf8 = stringprep_locale_to_utf8 (input);
  558. if (!utf8)
  559. return IDNA_ICONV_ERROR;
  560. rc = idna_to_ascii_8z (utf8, output, flags);
  561. free (utf8);
  562. return rc;
  563. }
  564. /**
  565. * idna_to_unicode_4z4z:
  566. * @input: zero-terminated Unicode string.
  567. * @output: pointer to newly allocated output Unicode string.
  568. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  569. * %IDNA_USE_STD3_ASCII_RULES.
  570. *
  571. * Convert possibly ACE encoded domain name in UCS-4 format into a
  572. * UCS-4 string. The domain name may contain several labels,
  573. * separated by dots. The output buffer must be deallocated by the
  574. * caller.
  575. *
  576. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  577. **/
  578. int
  579. idna_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
  580. {
  581. const uint32_t *start = input;
  582. const uint32_t *end;
  583. uint32_t *buf;
  584. size_t buflen;
  585. uint32_t *out = NULL;
  586. size_t outlen = 0;
  587. *output = NULL;
  588. do
  589. {
  590. end = start;
  591. for (; *end && !DOTP (*end); end++)
  592. ;
  593. buflen = (size_t) (end - start);
  594. buf = malloc (sizeof (buf[0]) * (buflen + 1));
  595. if (!buf)
  596. {
  597. free (out);
  598. return IDNA_MALLOC_ERROR;
  599. }
  600. /* don't check return code as per specification! */
  601. idna_to_unicode_44i (start, (size_t) (end - start),
  602. buf, &buflen, flags);
  603. if (out)
  604. {
  605. uint32_t *newp = realloc (out,
  606. sizeof (out[0])
  607. * (outlen + 1 + buflen + 1));
  608. if (!newp)
  609. {
  610. free (buf);
  611. free (out);
  612. return IDNA_MALLOC_ERROR;
  613. }
  614. out = newp;
  615. out[outlen++] = 0x002E; /* '.' (full stop) */
  616. memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
  617. outlen += buflen;
  618. out[outlen] = 0x0;
  619. free (buf);
  620. }
  621. else
  622. {
  623. out = buf;
  624. outlen = buflen;
  625. out[outlen] = 0x0;
  626. }
  627. start = end + 1;
  628. }
  629. while (*end);
  630. *output = out;
  631. return IDNA_SUCCESS;
  632. }
  633. /**
  634. * idna_to_unicode_8z4z:
  635. * @input: zero-terminated UTF-8 string.
  636. * @output: pointer to newly allocated output Unicode string.
  637. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  638. * %IDNA_USE_STD3_ASCII_RULES.
  639. *
  640. * Convert possibly ACE encoded domain name in UTF-8 format into a
  641. * UCS-4 string. The domain name may contain several labels,
  642. * separated by dots. The output buffer must be deallocated by the
  643. * caller.
  644. *
  645. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  646. **/
  647. int
  648. idna_to_unicode_8z4z (const char *input, uint32_t **output, int flags)
  649. {
  650. uint32_t *ucs4;
  651. size_t ucs4len;
  652. int rc;
  653. ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
  654. if (!ucs4)
  655. return IDNA_ICONV_ERROR;
  656. rc = idna_to_unicode_4z4z (ucs4, output, flags);
  657. free (ucs4);
  658. return rc;
  659. }
  660. /**
  661. * idna_to_unicode_8z8z:
  662. * @input: zero-terminated UTF-8 string.
  663. * @output: pointer to newly allocated output UTF-8 string.
  664. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  665. * %IDNA_USE_STD3_ASCII_RULES.
  666. *
  667. * Convert possibly ACE encoded domain name in UTF-8 format into a
  668. * UTF-8 string. The domain name may contain several labels,
  669. * separated by dots. The output buffer must be deallocated by the
  670. * caller.
  671. *
  672. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  673. **/
  674. int
  675. idna_to_unicode_8z8z (const char *input, char **output, int flags)
  676. {
  677. uint32_t *ucs4;
  678. int rc;
  679. rc = idna_to_unicode_8z4z (input, &ucs4, flags);
  680. if (rc != IDNA_SUCCESS)
  681. return rc;
  682. *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
  683. free (ucs4);
  684. if (!*output)
  685. return IDNA_ICONV_ERROR;
  686. return IDNA_SUCCESS;
  687. }
  688. /**
  689. * idna_to_unicode_8zlz:
  690. * @input: zero-terminated UTF-8 string.
  691. * @output: pointer to newly allocated output string encoded in the
  692. * current locale's character set.
  693. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  694. * %IDNA_USE_STD3_ASCII_RULES.
  695. *
  696. * Convert possibly ACE encoded domain name in UTF-8 format into a
  697. * string encoded in the current locale's character set. The domain
  698. * name may contain several labels, separated by dots. The output
  699. * buffer must be deallocated by the caller.
  700. *
  701. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  702. **/
  703. int
  704. idna_to_unicode_8zlz (const char *input, char **output, int flags)
  705. {
  706. char *utf8;
  707. int rc;
  708. rc = idna_to_unicode_8z8z (input, &utf8, flags);
  709. if (rc != IDNA_SUCCESS)
  710. return rc;
  711. *output = stringprep_utf8_to_locale (utf8);
  712. free (utf8);
  713. if (!*output)
  714. return IDNA_ICONV_ERROR;
  715. return IDNA_SUCCESS;
  716. }
  717. /**
  718. * idna_to_unicode_lzlz:
  719. * @input: zero-terminated string encoded in the current locale's
  720. * character set.
  721. * @output: pointer to newly allocated output string encoded in the
  722. * current locale's character set.
  723. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  724. * %IDNA_USE_STD3_ASCII_RULES.
  725. *
  726. * Convert possibly ACE encoded domain name in the locale's character
  727. * set into a string encoded in the current locale's character set.
  728. * The domain name may contain several labels, separated by dots. The
  729. * output buffer must be deallocated by the caller.
  730. *
  731. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  732. **/
  733. int
  734. idna_to_unicode_lzlz (const char *input, char **output, int flags)
  735. {
  736. char *utf8;
  737. int rc;
  738. utf8 = stringprep_locale_to_utf8 (input);
  739. if (!utf8)
  740. return IDNA_ICONV_ERROR;
  741. rc = idna_to_unicode_8zlz (utf8, output, flags);
  742. free (utf8);
  743. return rc;
  744. }
  745. /**
  746. * IDNA_ACE_PREFIX
  747. *
  748. * The IANA allocated prefix to use for IDNA. "xn--"
  749. */
  750. /**
  751. * Idna_rc:
  752. * @IDNA_SUCCESS: Successful operation. This value is guaranteed to
  753. * always be zero, the remaining ones are only guaranteed to hold
  754. * non-zero values, for logical comparison purposes.
  755. * @IDNA_STRINGPREP_ERROR: Error during string preparation.
  756. * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
  757. * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
  758. * the string contains non-LDH ASCII characters.
  759. * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
  760. * with typo in earlier versions.
  761. * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
  762. * the string contains a leading or trailing hyphen-minus (U+002D).
  763. * @IDNA_INVALID_LENGTH: The final output string is not within the
  764. * (inclusive) range 1 to IDNA_LABEL_MAX_LENGTH characters.
  765. * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
  766. * (for ToUnicode).
  767. * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
  768. * string does not equal the input.
  769. * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
  770. * ToASCII).
  771. * @IDNA_ICONV_ERROR: Character encoding conversion error.
  772. * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
  773. * fatal error).
  774. * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
  775. * internally in libc).
  776. *
  777. * Enumerated return codes of idna_to_ascii_4i(),
  778. * idna_to_unicode_44i() functions (and functions derived from those
  779. * functions). The value 0 is guaranteed to always correspond to
  780. * success.
  781. */
  782. /**
  783. * Idna_flags:
  784. * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
  785. * Unicode code points.
  786. * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
  787. * rules (i.e., normal host name rules).
  788. *
  789. * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
  790. */