idna.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. /* idna.c --- Convert to or from IDN strings.
  2. * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson
  3. *
  4. * This file is part of GNU Libidn.
  5. *
  6. * GNU Libidn is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * GNU Libidn is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with GNU Libidn; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
  19. *
  20. */
  21. #ifdef HAVE_CONFIG_H
  22. # include "idn_config.h"
  23. #endif
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <stringprep.h>
  27. #include <punycode.h>
  28. #include "idna.h"
  29. #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
  30. (c) == 0xFF0E || (c) == 0xFF61)
  31. #ifdef WITH_VALGRIND
  32. static size_t STRLEN(const char *s) {
  33. size_t ret = 0;
  34. while (*s++)
  35. ++ret;
  36. return ret;
  37. }
  38. static char* STRCPY(char* destination, const char* source) {
  39. char *p = destination;
  40. while (*source)
  41. *p++ = *source++;
  42. *p = 0;
  43. return destination;
  44. }
  45. static char* STRCAT(char* destination, const char* source) {
  46. char *p = destination;
  47. while (*p)
  48. ++p;
  49. while (*source)
  50. *p++ = *source++;
  51. *p = 0;
  52. return destination;
  53. }
  54. #else //WITH_VALGRIND
  55. # define STRLEN(s) strlen(s)
  56. # define STRCAT(d, s) strcat(d, s)
  57. # define STRCPY(d, s) strcpy(d, s)
  58. #endif
  59. /* Core functions */
  60. /**
  61. * idna_to_ascii_4i - convert Unicode domain name label to text
  62. * @in: input array with unicode code points.
  63. * @inlen: length of input array with unicode code points.
  64. * @out: output zero terminated string that must have room for at
  65. * least IDNA_LABEL_MAX_LENGTH characters plus the terminating zero.
  66. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  67. * %IDNA_USE_STD3_ASCII_RULES.
  68. *
  69. * The ToASCII operation takes a sequence of Unicode code points that
  70. * make up one domain label and transforms it into a sequence of code
  71. * points in the ASCII range (0..7F). If ToASCII succeeds, the
  72. * original sequence and the resulting sequence are equivalent labels.
  73. *
  74. * It is important to note that the ToASCII operation can fail. ToASCII
  75. * fails if any step of it fails. If any step of the ToASCII operation
  76. * fails on any label in a domain name, that domain name MUST NOT be used
  77. * as an internationalized domain name. The method for deadling with this
  78. * failure is application-specific.
  79. *
  80. * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
  81. * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
  82. * sequence of ASCII code points or a failure condition.
  83. *
  84. * ToASCII never alters a sequence of code points that are all in the ASCII
  85. * range to begin with (although it could fail). Applying the ToASCII
  86. * operation multiple times has exactly the same effect as applying it just
  87. * once.
  88. *
  89. * Return value: Returns 0 on success, or an #Idna_rc error code.
  90. */
  91. int
  92. idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
  93. {
  94. size_t len, outlen;
  95. uint32_t *src; /* XXX don't need to copy data? */
  96. int rc;
  97. /*
  98. * ToASCII consists of the following steps:
  99. *
  100. * 1. If all code points in the sequence are in the ASCII range (0..7F)
  101. * then skip to step 3.
  102. */
  103. {
  104. size_t i;
  105. int inasciirange;
  106. inasciirange = 1;
  107. for (i = 0; i < inlen; i++)
  108. if (in[i] > 0x7F)
  109. inasciirange = 0;
  110. if (inasciirange)
  111. {
  112. src = malloc (sizeof (in[0]) * (inlen + 1));
  113. if (src == NULL)
  114. return IDNA_MALLOC_ERROR;
  115. memcpy (src, in, sizeof (in[0]) * inlen);
  116. src[inlen] = 0;
  117. goto step3;
  118. }
  119. }
  120. /*
  121. * 2. Perform the steps specified in [NAMEPREP] and fail if there is
  122. * an error. The AllowUnassigned flag is used in [NAMEPREP].
  123. */
  124. {
  125. char *p;
  126. p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
  127. if (p == NULL)
  128. return IDNA_MALLOC_ERROR;
  129. len = STRLEN (p);
  130. do
  131. {
  132. char *newp;
  133. len = 2 * len + 10; /* XXX better guess? */
  134. newp = realloc (p, len);
  135. if (newp == NULL)
  136. {
  137. free (p);
  138. return IDNA_MALLOC_ERROR;
  139. }
  140. p = newp;
  141. if (flags & IDNA_ALLOW_UNASSIGNED)
  142. rc = stringprep_nameprep (p, len);
  143. else
  144. rc = stringprep_nameprep_no_unassigned (p, len);
  145. }
  146. while (rc == STRINGPREP_TOO_SMALL_BUFFER);
  147. if (rc != STRINGPREP_OK)
  148. {
  149. free (p);
  150. return IDNA_STRINGPREP_ERROR;
  151. }
  152. src = stringprep_utf8_to_ucs4 (p, -1, NULL);
  153. free (p);
  154. }
  155. step3:
  156. /*
  157. * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
  158. *
  159. * (a) Verify the absence of non-LDH ASCII code points; that is,
  160. * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
  161. *
  162. * (b) Verify the absence of leading and trailing hyphen-minus;
  163. * that is, the absence of U+002D at the beginning and end of
  164. * the sequence.
  165. */
  166. if (flags & IDNA_USE_STD3_ASCII_RULES)
  167. {
  168. size_t i;
  169. for (i = 0; src[i]; i++)
  170. if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
  171. (src[i] >= 0x3A && src[i] <= 0x40) ||
  172. (src[i] >= 0x5B && src[i] <= 0x60) ||
  173. (src[i] >= 0x7B && src[i] <= 0x7F))
  174. {
  175. free (src);
  176. return IDNA_CONTAINS_NON_LDH;
  177. }
  178. if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
  179. {
  180. free (src);
  181. return IDNA_CONTAINS_MINUS;
  182. }
  183. }
  184. /*
  185. * 4. If all code points in the sequence are in the ASCII range
  186. * (0..7F), then skip to step 8.
  187. */
  188. {
  189. size_t i;
  190. int inasciirange;
  191. inasciirange = 1;
  192. for (i = 0; src[i]; i++)
  193. {
  194. if (src[i] > 0x7F)
  195. inasciirange = 0;
  196. /* copy string to output buffer if we are about to skip to step8 */
  197. if (i < IDNA_LABEL_MAX_LENGTH)
  198. out[i] = src[i];
  199. }
  200. if (i < IDNA_LABEL_MAX_LENGTH + 1)
  201. out[i] = '\0';
  202. else
  203. out[IDNA_LABEL_MAX_LENGTH] = 0;
  204. if (inasciirange)
  205. goto step8;
  206. }
  207. /*
  208. * 5. Verify that the sequence does NOT begin with the ACE prefix.
  209. *
  210. */
  211. {
  212. size_t i;
  213. int match;
  214. match = 1;
  215. for (i = 0; match && i < STRLEN (IDNA_ACE_PREFIX); i++)
  216. if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
  217. match = 0;
  218. if (match)
  219. {
  220. free (src);
  221. return IDNA_CONTAINS_ACE_PREFIX;
  222. }
  223. }
  224. /*
  225. * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
  226. * and fail if there is an error.
  227. */
  228. for (len = 0; src[len]; len++)
  229. ;
  230. src[len] = '\0';
  231. outlen = IDNA_LABEL_MAX_LENGTH - STRLEN (IDNA_ACE_PREFIX);
  232. rc = punycode_encode (len, src, NULL,
  233. &outlen, &out[STRLEN (IDNA_ACE_PREFIX)]);
  234. if (rc != PUNYCODE_SUCCESS)
  235. {
  236. free (src);
  237. return IDNA_PUNYCODE_ERROR;
  238. }
  239. out[STRLEN (IDNA_ACE_PREFIX) + outlen] = '\0';
  240. /*
  241. * 7. Prepend the ACE prefix.
  242. */
  243. memcpy (out, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX));
  244. /*
  245. * 8. Verify that the number of code points is in the range 1 to IDNA_LABEL_MAX_LENGTH
  246. * inclusive (0 is excluded).
  247. */
  248. step8:
  249. free (src);
  250. if (STRLEN (out) < 1 || STRLEN (out) > IDNA_LABEL_MAX_LENGTH - 1)
  251. return IDNA_INVALID_LENGTH;
  252. return IDNA_SUCCESS;
  253. }
  254. /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */
  255. static int
  256. idna_to_unicode_internal (char *utf8in,
  257. uint32_t * out, size_t * outlen, int flags)
  258. {
  259. int rc;
  260. char tmpout[IDNA_LABEL_MAX_LENGTH + 1];
  261. size_t utf8len = STRLEN (utf8in) + 1;
  262. size_t addlen = 0;
  263. /*
  264. * ToUnicode consists of the following steps:
  265. *
  266. * 1. If the sequence contains any code points outside the ASCII range
  267. * (0..7F) then proceed to step 2, otherwise skip to step 3.
  268. */
  269. {
  270. size_t i;
  271. int inasciirange;
  272. inasciirange = 1;
  273. for (i = 0; utf8in[i]; i++)
  274. if (utf8in[i] & ~0x7F)
  275. inasciirange = 0;
  276. if (inasciirange)
  277. goto step3;
  278. }
  279. /*
  280. * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
  281. * error. (If step 3 of ToASCII is also performed here, it will not
  282. * affect the overall behavior of ToUnicode, but it is not
  283. * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
  284. */
  285. do
  286. {
  287. char *newp = realloc (utf8in, utf8len + addlen);
  288. if (newp == NULL)
  289. {
  290. free (utf8in);
  291. return IDNA_MALLOC_ERROR;
  292. }
  293. utf8in = newp;
  294. if (flags & IDNA_ALLOW_UNASSIGNED)
  295. rc = stringprep_nameprep (utf8in, utf8len + addlen);
  296. else
  297. rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
  298. addlen += 1;
  299. }
  300. while (rc == STRINGPREP_TOO_SMALL_BUFFER);
  301. if (rc != STRINGPREP_OK)
  302. {
  303. free (utf8in);
  304. return IDNA_STRINGPREP_ERROR;
  305. }
  306. /* 3. Verify that the sequence begins with the ACE prefix, and save a
  307. * copy of the sequence.
  308. */
  309. step3:
  310. if (strncmp (IDNA_ACE_PREFIX, utf8in, STRLEN (IDNA_ACE_PREFIX)) != 0)
  311. {
  312. free (utf8in);
  313. return IDNA_NO_ACE_PREFIX;
  314. }
  315. /* 4. Remove the ACE prefix.
  316. */
  317. memmove (utf8in, &utf8in[STRLEN (IDNA_ACE_PREFIX)],
  318. STRLEN (utf8in) - STRLEN (IDNA_ACE_PREFIX) + 1);
  319. /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
  320. * and fail if there is an error. Save a copy of the result of
  321. * this step.
  322. */
  323. (*outlen)--; /* reserve one for the zero */
  324. rc = punycode_decode (STRLEN (utf8in), utf8in, outlen, out, NULL);
  325. if (rc != PUNYCODE_SUCCESS)
  326. {
  327. free (utf8in);
  328. return IDNA_PUNYCODE_ERROR;
  329. }
  330. out[*outlen] = 0; /* add zero */
  331. /* 6. Apply ToASCII.
  332. */
  333. rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
  334. if (rc != IDNA_SUCCESS)
  335. {
  336. free (utf8in);
  337. return rc;
  338. }
  339. /* 7. Verify that the result of step 6 matches the saved copy from
  340. * step 3, using a case-insensitive ASCII comparison.
  341. */
  342. if (strcasecmp (utf8in, tmpout + STRLEN (IDNA_ACE_PREFIX)) != 0)
  343. {
  344. free (utf8in);
  345. return IDNA_ROUNDTRIP_VERIFY_ERROR;
  346. }
  347. /* 8. Return the saved copy from step 5.
  348. */
  349. free (utf8in);
  350. return IDNA_SUCCESS;
  351. }
  352. /**
  353. * idna_to_unicode_44i - convert domain name label to Unicode
  354. * @in: input array with unicode code points.
  355. * @inlen: length of input array with unicode code points.
  356. * @out: output array with unicode code points.
  357. * @outlen: on input, maximum size of output array with unicode code points,
  358. * on exit, actual size of output array with unicode code points.
  359. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  360. * %IDNA_USE_STD3_ASCII_RULES.
  361. *
  362. * The ToUnicode operation takes a sequence of Unicode code points
  363. * that make up one domain label and returns a sequence of Unicode
  364. * code points. If the input sequence is a label in ACE form, then the
  365. * result is an equivalent internationalized label that is not in ACE
  366. * form, otherwise the original sequence is returned unaltered.
  367. *
  368. * ToUnicode never fails. If any step fails, then the original input
  369. * sequence is returned immediately in that step.
  370. *
  371. * The Punycode decoder can never output more code points than it
  372. * inputs, but Nameprep can, and therefore ToUnicode can. Note that
  373. * the number of octets needed to represent a sequence of code points
  374. * depends on the particular character encoding used.
  375. *
  376. * The inputs to ToUnicode are a sequence of code points, the
  377. * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
  378. * ToUnicode is always a sequence of Unicode code points.
  379. *
  380. * Return value: Returns #Idna_rc error condition, but it must only be
  381. * used for debugging purposes. The output buffer is always
  382. * guaranteed to contain the correct data according to the
  383. * specification (sans malloc induced errors). NB! This means that
  384. * you normally ignore the return code from this function, as
  385. * checking it means breaking the standard.
  386. */
  387. int
  388. idna_to_unicode_44i (const uint32_t * in, size_t inlen,
  389. uint32_t * out, size_t * outlen, int flags)
  390. {
  391. int rc;
  392. size_t outlensave = *outlen;
  393. char *p;
  394. p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
  395. if (p == NULL)
  396. return IDNA_MALLOC_ERROR;
  397. rc = idna_to_unicode_internal (p, out, outlen, flags);
  398. if (rc != IDNA_SUCCESS)
  399. {
  400. memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
  401. inlen : outlensave));
  402. *outlen = inlen;
  403. }
  404. /* p is freed in idna_to_unicode_internal. */
  405. return rc;
  406. }
  407. /* Wrappers that handle several labels */
  408. /**
  409. * idna_to_ascii_4z - convert Unicode domain name to text
  410. * @input: zero terminated input Unicode string.
  411. * @output: pointer to newly allocated output string.
  412. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  413. * %IDNA_USE_STD3_ASCII_RULES.
  414. *
  415. * Convert UCS-4 domain name to ASCII string. The domain name may
  416. * contain several labels, separated by dots. The output buffer must
  417. * be deallocated by the caller.
  418. *
  419. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  420. **/
  421. int
  422. idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
  423. {
  424. const uint32_t *start = input;
  425. const uint32_t *end = input;
  426. char buf[1<<9];
  427. char *out = NULL;
  428. int rc;
  429. /* 1) Whenever dots are used as label separators, the following
  430. characters MUST be recognized as dots: U+002E (full stop),
  431. U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
  432. U+FF61 (halfwidth ideographic full stop). */
  433. if (input[0] == 0)
  434. {
  435. /* Handle implicit zero-length root label. */
  436. *output = malloc (1);
  437. if (!*output)
  438. return IDNA_MALLOC_ERROR;
  439. STRCPY (*output, "");
  440. return IDNA_SUCCESS;
  441. }
  442. if (DOTP (input[0]) && input[1] == 0)
  443. {
  444. /* Handle explicit zero-length root label. */
  445. *output = malloc (2);
  446. if (!*output)
  447. return IDNA_MALLOC_ERROR;
  448. STRCPY (*output, ".");
  449. return IDNA_SUCCESS;
  450. }
  451. *output = NULL;
  452. do
  453. {
  454. end = start;
  455. for (; *end && !DOTP (*end); end++)
  456. ;
  457. if (*end == '\0' && start == end)
  458. {
  459. /* Handle explicit zero-length root label. */
  460. buf[0] = '\0';
  461. }
  462. else
  463. {
  464. rc = idna_to_ascii_4i (start, end - start, buf, flags);
  465. if (rc != IDNA_SUCCESS)
  466. {
  467. free (out);
  468. return rc;
  469. }
  470. }
  471. if (out)
  472. {
  473. char *newp = realloc (out, STRLEN (out) + 1 + STRLEN (buf) + 1);
  474. if (!newp)
  475. {
  476. free (out);
  477. return IDNA_MALLOC_ERROR;
  478. }
  479. out = newp;
  480. STRCAT (out, ".");
  481. STRCAT (out, buf);
  482. }
  483. else
  484. {
  485. out = (char *) malloc (STRLEN (buf) + 1);
  486. if (!out)
  487. return IDNA_MALLOC_ERROR;
  488. STRCPY (out, buf);
  489. }
  490. start = end + 1;
  491. }
  492. while (*end);
  493. *output = out;
  494. return IDNA_SUCCESS;
  495. }
  496. /**
  497. * idna_to_ascii_8z - convert Unicode domain name to text
  498. * @input: zero terminated input UTF-8 string.
  499. * @output: pointer to newly allocated output string.
  500. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  501. * %IDNA_USE_STD3_ASCII_RULES.
  502. *
  503. * Convert UTF-8 domain name to ASCII string. The domain name may
  504. * contain several labels, separated by dots. The output buffer must
  505. * be deallocated by the caller.
  506. *
  507. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  508. **/
  509. int
  510. idna_to_ascii_8z (const char *input, char **output, int flags)
  511. {
  512. uint32_t *ucs4;
  513. size_t ucs4len;
  514. int rc;
  515. ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
  516. if (!ucs4)
  517. return IDNA_ICONV_ERROR;
  518. rc = idna_to_ascii_4z (ucs4, output, flags);
  519. free (ucs4);
  520. return rc;
  521. }
  522. /**
  523. * idna_to_ascii_lz - convert Unicode domain name to text
  524. * @input: zero terminated input string encoded in the current locale's
  525. * character set.
  526. * @output: pointer to newly allocated output string.
  527. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  528. * %IDNA_USE_STD3_ASCII_RULES.
  529. *
  530. * Convert domain name in the locale's encoding to ASCII string. The
  531. * domain name may contain several labels, separated by dots. The
  532. * output buffer must be deallocated by the caller.
  533. *
  534. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  535. **/
  536. int
  537. idna_to_ascii_lz (const char *input, char **output, int flags)
  538. {
  539. char *utf8;
  540. int rc;
  541. utf8 = stringprep_locale_to_utf8 (input);
  542. if (!utf8)
  543. return IDNA_ICONV_ERROR;
  544. rc = idna_to_ascii_8z (utf8, output, flags);
  545. free (utf8);
  546. return rc;
  547. }
  548. /**
  549. * idna_to_unicode_4z4z - convert domain name to Unicode
  550. * @input: zero-terminated Unicode string.
  551. * @output: pointer to newly allocated output Unicode string.
  552. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  553. * %IDNA_USE_STD3_ASCII_RULES.
  554. *
  555. * Convert possibly ACE encoded domain name in UCS-4 format into a
  556. * UCS-4 string. The domain name may contain several labels,
  557. * separated by dots. The output buffer must be deallocated by the
  558. * caller.
  559. *
  560. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  561. **/
  562. int
  563. idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
  564. {
  565. const uint32_t *start = input;
  566. const uint32_t *end = input;
  567. uint32_t *buf;
  568. size_t buflen;
  569. uint32_t *out = NULL;
  570. size_t outlen = 0;
  571. int rc;
  572. *output = NULL;
  573. do
  574. {
  575. end = start;
  576. for (; *end && !DOTP (*end); end++)
  577. ;
  578. buflen = end - start;
  579. buf = malloc (sizeof (buf[0]) * (buflen + 1));
  580. if (!buf)
  581. return IDNA_MALLOC_ERROR;
  582. rc = idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
  583. /* don't check rc as per specification! */
  584. if (out)
  585. {
  586. uint32_t *newp = realloc (out,
  587. sizeof (out[0])
  588. * (outlen + 1 + buflen + 1));
  589. if (!newp)
  590. {
  591. free (buf);
  592. free (out);
  593. return IDNA_MALLOC_ERROR;
  594. }
  595. out = newp;
  596. out[outlen++] = 0x002E; /* '.' (full stop) */
  597. memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
  598. outlen += buflen;
  599. out[outlen] = 0x0;
  600. free (buf);
  601. }
  602. else
  603. {
  604. out = buf;
  605. outlen = buflen;
  606. out[outlen] = 0x0;
  607. }
  608. start = end + 1;
  609. }
  610. while (*end);
  611. *output = out;
  612. return IDNA_SUCCESS;
  613. }
  614. /**
  615. * idna_to_unicode_8z4z - convert domain name to Unicode
  616. * @input: zero-terminated UTF-8 string.
  617. * @output: pointer to newly allocated output Unicode string.
  618. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  619. * %IDNA_USE_STD3_ASCII_RULES.
  620. *
  621. * Convert possibly ACE encoded domain name in UTF-8 format into a
  622. * UCS-4 string. The domain name may contain several labels,
  623. * separated by dots. The output buffer must be deallocated by the
  624. * caller.
  625. *
  626. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  627. **/
  628. int
  629. idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
  630. {
  631. uint32_t *ucs4;
  632. size_t ucs4len;
  633. int rc;
  634. ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
  635. if (!ucs4)
  636. return IDNA_ICONV_ERROR;
  637. rc = idna_to_unicode_4z4z (ucs4, output, flags);
  638. free (ucs4);
  639. return rc;
  640. }
  641. /**
  642. * idna_to_unicode_8z8z - convert domain name to Unicode
  643. * @input: zero-terminated UTF-8 string.
  644. * @output: pointer to newly allocated output UTF-8 string.
  645. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  646. * %IDNA_USE_STD3_ASCII_RULES.
  647. *
  648. * Convert possibly ACE encoded domain name in UTF-8 format into a
  649. * UTF-8 string. The domain name may contain several labels,
  650. * separated by dots. The output buffer must be deallocated by the
  651. * caller.
  652. *
  653. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  654. **/
  655. int
  656. idna_to_unicode_8z8z (const char *input, char **output, int flags)
  657. {
  658. uint32_t *ucs4;
  659. int rc;
  660. rc = idna_to_unicode_8z4z (input, &ucs4, flags);
  661. *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
  662. free (ucs4);
  663. if (!*output)
  664. return IDNA_ICONV_ERROR;
  665. return rc;
  666. }
  667. /**
  668. * idna_to_unicode_8zlz - convert domain name to Unicode
  669. * @input: zero-terminated UTF-8 string.
  670. * @output: pointer to newly allocated output string encoded in the
  671. * current locale's character set.
  672. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  673. * %IDNA_USE_STD3_ASCII_RULES.
  674. *
  675. * Convert possibly ACE encoded domain name in UTF-8 format into a
  676. * string encoded in the current locale's character set. The domain
  677. * name may contain several labels, separated by dots. The output
  678. * buffer must be deallocated by the caller.
  679. *
  680. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  681. **/
  682. int
  683. idna_to_unicode_8zlz (const char *input, char **output, int flags)
  684. {
  685. char *utf8;
  686. int rc;
  687. rc = idna_to_unicode_8z8z (input, &utf8, flags);
  688. *output = stringprep_utf8_to_locale (utf8);
  689. free (utf8);
  690. if (!*output)
  691. return IDNA_ICONV_ERROR;
  692. return rc;
  693. }
  694. /**
  695. * idna_to_unicode_lzlz - convert domain name to Unicode
  696. * @input: zero-terminated string encoded in the current locale's
  697. * character set.
  698. * @output: pointer to newly allocated output string encoded in the
  699. * current locale's character set.
  700. * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
  701. * %IDNA_USE_STD3_ASCII_RULES.
  702. *
  703. * Convert possibly ACE encoded domain name in the locale's character
  704. * set into a string encoded in the current locale's character set.
  705. * The domain name may contain several labels, separated by dots. The
  706. * output buffer must be deallocated by the caller.
  707. *
  708. * Return value: Returns %IDNA_SUCCESS on success, or error code.
  709. **/
  710. int
  711. idna_to_unicode_lzlz (const char *input, char **output, int flags)
  712. {
  713. char *utf8;
  714. int rc;
  715. utf8 = stringprep_locale_to_utf8 (input);
  716. if (!utf8)
  717. return IDNA_ICONV_ERROR;
  718. rc = idna_to_unicode_8zlz (utf8, output, flags);
  719. free (utf8);
  720. return rc;
  721. }
  722. /**
  723. * IDNA_ACE_PREFIX
  724. *
  725. * The IANA allocated prefix to use for IDNA. "xn--"
  726. */
  727. /**
  728. * Idna_rc:
  729. * @IDNA_SUCCESS: Successful operation. This value is guaranteed to
  730. * always be zero, the remaining ones are only guaranteed to hold
  731. * non-zero values, for logical comparison purposes.
  732. * @IDNA_STRINGPREP_ERROR: Error during string preparation.
  733. * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
  734. * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
  735. * the string contains non-LDH ASCII characters.
  736. * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
  737. * the string contains a leading or trailing hyphen-minus (U+002D).
  738. * @IDNA_INVALID_LENGTH: The final output string is not within the
  739. * (inclusive) range 1 to IDNA_LABEL_MAX_LENGTH characters.
  740. * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
  741. * (for ToUnicode).
  742. * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
  743. * string does not equal the input.
  744. * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
  745. * ToASCII).
  746. * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
  747. * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
  748. * fatal error).
  749. * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
  750. * internally in libc).
  751. *
  752. * Enumerated return codes of idna_to_ascii_4i(),
  753. * idna_to_unicode_44i() functions (and functions derived from those
  754. * functions). The value 0 is guaranteed to always correspond to
  755. * success.
  756. */
  757. /**
  758. * Idna_flags:
  759. * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
  760. * Unicode code points.
  761. * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
  762. * rules (i.e., normal host name rules).
  763. *
  764. * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
  765. */