123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926 |
- /* idna.c --- Prototypes for Internationalized Domain Name library.
- Copyright (C) 2002-2024 Simon Josefsson
- This file is part of GNU Libidn.
- GNU Libidn is free software: you can redistribute it and/or
- modify it under the terms of either:
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at
- your option) any later version.
- or
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at
- your option) any later version.
- or both in parallel, as here.
- GNU Libidn is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see <https://www.gnu.org/licenses/>. */
- #ifdef HAVE_CONFIG_H
- # include "config.h"
- #endif
- #include <stdlib.h>
- #include <string.h>
- #include <stringprep.h>
- #include <punycode.h>
- #include "idna.h"
- /* Get c_strcasecmp. */
- #include <c-strcase.h>
- #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
- (c) == 0xFF0E || (c) == 0xFF61)
- #ifdef WITH_VALGRIND
- static size_t STRLEN(const char *s) {
- size_t ret = 0;
- while (*s++)
- ++ret;
- return ret;
- }
- static char* STRCPY(char* destination, const char* source) {
- char *p = destination;
- while (*source)
- *p++ = *source++;
- *p = 0;
- return destination;
- }
- static char* STRCAT(char* destination, const char* source) {
- char *p = destination;
- while (*p)
- ++p;
- while (*source)
- *p++ = *source++;
- *p = 0;
- return destination;
- }
- #else //WITH_VALGRIND
- # define STRLEN(s) strlen(s)
- # define STRCAT(d, s) strcat(d, s)
- # define STRCPY(d, s) strcpy(d, s)
- #endif
- /* Core functions */
- /**
- * idna_to_ascii_4i:
- * @in: input array with unicode code points.
- * @inlen: length of input array with unicode code points.
- * @out: output zero terminated string that must have room for at
- * least IDNA_LABEL_MAX_LENGTH characters plus the terminating zero.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * The ToASCII operation takes a sequence of Unicode code points that
- * make up one domain label and transforms it into a sequence of code
- * points in the ASCII range (0..7F). If ToASCII succeeds, the
- * original sequence and the resulting sequence are equivalent labels.
- *
- * It is important to note that the ToASCII operation can fail. ToASCII
- * fails if any step of it fails. If any step of the ToASCII operation
- * fails on any label in a domain name, that domain name MUST NOT be used
- * as an internationalized domain name. The method for deadling with this
- * failure is application-specific.
- *
- * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
- * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
- * sequence of ASCII code points or a failure condition.
- *
- * ToASCII never alters a sequence of code points that are all in the ASCII
- * range to begin with (although it could fail). Applying the ToASCII
- * operation multiple times has exactly the same effect as applying it just
- * once.
- *
- * Return value: Returns 0 on success, or an #Idna_rc error code.
- */
- int
- idna_to_ascii_4i (const uint32_t *in, size_t inlen, char *out, int flags)
- {
- size_t len, outlen;
- uint32_t *src; /* XXX don't need to copy data? */
- int rc;
- /*
- * ToASCII consists of the following steps:
- *
- * 1. If all code points in the sequence are in the ASCII range (0..7F)
- * then skip to step 3.
- */
- {
- size_t i;
- int inasciirange;
- inasciirange = 1;
- for (i = 0; i < inlen; i++)
- if (in[i] > 0x7F)
- inasciirange = 0;
- if (inasciirange)
- {
- src = malloc (sizeof (in[0]) * (inlen + 1));
- if (src == NULL)
- return IDNA_MALLOC_ERROR;
- memcpy (src, in, sizeof (in[0]) * inlen);
- src[inlen] = 0;
- goto step3;
- }
- }
- /*
- * 2. Perform the steps specified in [NAMEPREP] and fail if there is
- * an error. The AllowUnassigned flag is used in [NAMEPREP].
- */
- {
- char *p;
- p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
- if (p == NULL)
- return IDNA_MALLOC_ERROR;
- len = STRLEN (p);
- do
- {
- char *newp;
- len = 2 * len + 10; /* XXX better guess? */
- newp = realloc (p, len);
- if (newp == NULL)
- {
- free (p);
- return IDNA_MALLOC_ERROR;
- }
- p = newp;
- if (flags & IDNA_ALLOW_UNASSIGNED)
- rc = stringprep_nameprep (p, len);
- else
- rc = stringprep_nameprep_no_unassigned (p, len);
- }
- while (rc == STRINGPREP_TOO_SMALL_BUFFER);
- if (rc != STRINGPREP_OK)
- {
- free (p);
- return IDNA_STRINGPREP_ERROR;
- }
- src = stringprep_utf8_to_ucs4 (p, -1, NULL);
- free (p);
- if (!src)
- return IDNA_MALLOC_ERROR;
- }
- step3:
- /*
- * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
- *
- * (a) Verify the absence of non-LDH ASCII code points; that is,
- * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
- *
- * (b) Verify the absence of leading and trailing hyphen-minus;
- * that is, the absence of U+002D at the beginning and end of
- * the sequence.
- */
- if (flags & IDNA_USE_STD3_ASCII_RULES)
- {
- size_t i;
- for (i = 0; src[i]; i++)
- if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
- (src[i] >= 0x3A && src[i] <= 0x40) ||
- (src[i] >= 0x5B && src[i] <= 0x60) ||
- (src[i] >= 0x7B && src[i] <= 0x7F))
- {
- free (src);
- return IDNA_CONTAINS_NON_LDH;
- }
- if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
- {
- free (src);
- return IDNA_CONTAINS_MINUS;
- }
- }
- /*
- * 4. If all code points in the sequence are in the ASCII range
- * (0..7F), then skip to step 8.
- */
- {
- size_t i;
- int inasciirange;
- inasciirange = 1;
- for (i = 0; src[i]; i++)
- {
- if (src[i] > 0x7F)
- inasciirange = 0;
- /* copy string to output buffer if we are about to skip to step8 */
- if (i < IDNA_LABEL_MAX_LENGTH)
- out[i] = src[i];
- }
- if (i < IDNA_LABEL_MAX_LENGTH)
- out[i] = '\0';
- else
- {
- out[IDNA_LABEL_MAX_LENGTH] = 0;
- free (src);
- return IDNA_INVALID_LENGTH;
- }
- if (inasciirange)
- goto step8;
- }
- /*
- * 5. Verify that the sequence does NOT begin with the ACE prefix.
- *
- */
- {
- size_t i;
- int match;
- match = 1;
- for (i = 0; match && i < STRLEN (IDNA_ACE_PREFIX); i++)
- if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
- match = 0;
- if (match)
- {
- free (src);
- return IDNA_CONTAINS_ACE_PREFIX;
- }
- }
- /*
- * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
- * and fail if there is an error.
- */
- for (len = 0; src[len]; len++)
- ;
- src[len] = '\0';
- outlen = IDNA_LABEL_MAX_LENGTH - STRLEN (IDNA_ACE_PREFIX);
- rc = punycode_encode (len, src, NULL,
- &outlen, &out[STRLEN (IDNA_ACE_PREFIX)]);
- if (rc != PUNYCODE_SUCCESS)
- {
- free (src);
- return IDNA_PUNYCODE_ERROR;
- }
- out[STRLEN (IDNA_ACE_PREFIX) + outlen] = '\0';
- /*
- * 7. Prepend the ACE prefix.
- */
- memcpy (out, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX));
- /*
- * 8. Verify that the number of code points is in the range 1 to IDNA_LABEL_MAX_LENGTH
- * inclusive (0 is excluded).
- */
- step8:
- free (src);
- if (STRLEN (out) < 1 || STRLEN (out) > IDNA_LABEL_MAX_LENGTH - 1)
- return IDNA_INVALID_LENGTH;
- return IDNA_SUCCESS;
- }
- /* ToUnicode(). May realloc() utf8in. Will free utf8in unconditionally. */
- static int
- idna_to_unicode_internal (char *utf8in,
- uint32_t *out, size_t *outlen, int flags)
- {
- int rc;
- char tmpout[IDNA_LABEL_MAX_LENGTH + 1];
- size_t utf8len = STRLEN (utf8in) + 1;
- size_t addlen = 0, addinc = utf8len / 10 + 1;
- /*
- * ToUnicode consists of the following steps:
- *
- * 1. If the sequence contains any code points outside the ASCII range
- * (0..7F) then proceed to step 2, otherwise skip to step 3.
- */
- {
- size_t i;
- int inasciirange;
- inasciirange = 1;
- for (i = 0; utf8in[i]; i++)
- if (utf8in[i] & ~0x7F)
- inasciirange = 0;
- if (inasciirange)
- goto step3;
- }
- /*
- * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
- * error. (If step 3 of ToASCII is also performed here, it will not
- * affect the overall behavior of ToUnicode, but it is not
- * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
- */
- do
- {
- char *newp = realloc (utf8in, utf8len + addlen);
- if (newp == NULL)
- {
- free (utf8in);
- return IDNA_MALLOC_ERROR;
- }
- utf8in = newp;
- if (flags & IDNA_ALLOW_UNASSIGNED)
- rc = stringprep_nameprep (utf8in, utf8len + addlen);
- else
- rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
- addlen += addinc;
- addinc *= 2;
- }
- while (rc == STRINGPREP_TOO_SMALL_BUFFER);
- if (rc != STRINGPREP_OK)
- {
- free (utf8in);
- return IDNA_STRINGPREP_ERROR;
- }
- /* 3. Verify that the sequence begins with the ACE prefix, and save a
- * copy of the sequence.
- * ... The ToASCII and ToUnicode operations MUST recognize the ACE
- prefix in a case-insensitive manner.
- */
- step3:
- if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX)) != 0)
- {
- free (utf8in);
- return IDNA_NO_ACE_PREFIX;
- }
- /* 4. Remove the ACE prefix.
- */
- memmove (utf8in, &utf8in[STRLEN (IDNA_ACE_PREFIX)],
- STRLEN (utf8in) - STRLEN (IDNA_ACE_PREFIX) + 1);
- /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
- * and fail if there is an error. Save a copy of the result of
- * this step.
- */
- (*outlen)--; /* reserve one for the zero */
- rc = punycode_decode (STRLEN (utf8in), utf8in, outlen, out, NULL);
- if (rc != PUNYCODE_SUCCESS)
- {
- free (utf8in);
- return IDNA_PUNYCODE_ERROR;
- }
- out[*outlen] = 0; /* add zero */
- /* 6. Apply ToASCII.
- */
- rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
- if (rc != IDNA_SUCCESS)
- {
- free (utf8in);
- return rc;
- }
- /* 7. Verify that the result of step 6 matches the saved copy from
- * step 3, using a case-insensitive ASCII comparison.
- */
- if (c_strncasecmp (tmpout, IDNA_ACE_PREFIX, STRLEN (IDNA_ACE_PREFIX)) != 0)
- {
- free (utf8in);
- return IDNA_ROUNDTRIP_VERIFY_ERROR;
- }
- if (c_strcasecmp (utf8in, tmpout + STRLEN (IDNA_ACE_PREFIX)) != 0)
- {
- free (utf8in);
- return IDNA_ROUNDTRIP_VERIFY_ERROR;
- }
- /* 8. Return the saved copy from step 5.
- */
- free (utf8in);
- return IDNA_SUCCESS;
- }
- /**
- * idna_to_unicode_44i:
- * @in: input array with unicode code points.
- * @inlen: length of input array with unicode code points.
- * @out: output array with unicode code points.
- * @outlen: on input, maximum size of output array with unicode code points,
- * on exit, actual size of output array with unicode code points.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * The ToUnicode operation takes a sequence of Unicode code points
- * that make up one domain label and returns a sequence of Unicode
- * code points. If the input sequence is a label in ACE form, then the
- * result is an equivalent internationalized label that is not in ACE
- * form, otherwise the original sequence is returned unaltered.
- *
- * ToUnicode never fails. If any step fails, then the original input
- * sequence is returned immediately in that step.
- *
- * The Punycode decoder can never output more code points than it
- * inputs, but Nameprep can, and therefore ToUnicode can. Note that
- * the number of octets needed to represent a sequence of code points
- * depends on the particular character encoding used.
- *
- * The inputs to ToUnicode are a sequence of code points, the
- * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
- * ToUnicode is always a sequence of Unicode code points.
- *
- * Return value: Returns #Idna_rc error condition, but it must only be
- * used for debugging purposes. The output buffer is always
- * guaranteed to contain the correct data according to the
- * specification (sans malloc induced errors). NB! This means that
- * you normally ignore the return code from this function, as
- * checking it means breaking the standard.
- */
- int
- idna_to_unicode_44i (const uint32_t *in, size_t inlen,
- uint32_t *out, size_t *outlen, int flags)
- {
- int rc;
- size_t outlensave = *outlen;
- char *p;
- p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
- if (p == NULL)
- return IDNA_MALLOC_ERROR;
- rc = idna_to_unicode_internal (p, out, outlen, flags);
- if (rc != IDNA_SUCCESS)
- {
- memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
- inlen : outlensave));
- *outlen = inlen;
- }
- /* p is freed in idna_to_unicode_internal. */
- return rc;
- }
- /* Wrappers that handle several labels */
- /**
- * idna_to_ascii_4z:
- * @input: zero terminated input Unicode string.
- * @output: pointer to newly allocated output string.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert UCS-4 domain name to ASCII string. The domain name may
- * contain several labels, separated by dots. The output buffer must
- * be deallocated by the caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_ascii_4z (const uint32_t *input, char **output, int flags)
- {
- const uint32_t *start = input;
- const uint32_t *end;
- char buf[1<<9];
- char *out = NULL;
- int rc;
- /* 1) Whenever dots are used as label separators, the following
- characters MUST be recognized as dots: U+002E (full stop),
- U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
- U+FF61 (halfwidth ideographic full stop). */
- if (input[0] == 0)
- {
- /* Handle implicit zero-length root label. */
- *output = malloc (1);
- if (!*output)
- return IDNA_MALLOC_ERROR;
- STRCPY (*output, "");
- return IDNA_SUCCESS;
- }
- if (DOTP (input[0]) && input[1] == 0)
- {
- /* Handle explicit zero-length root label. */
- *output = malloc (2);
- if (!*output)
- return IDNA_MALLOC_ERROR;
- STRCPY (*output, ".");
- return IDNA_SUCCESS;
- }
- *output = NULL;
- do
- {
- end = start;
- for (; *end && !DOTP (*end); end++)
- ;
- if (*end == '\0' && start == end)
- {
- /* Handle explicit zero-length root label. */
- buf[0] = '\0';
- }
- else
- {
- rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
- if (rc != IDNA_SUCCESS)
- {
- free (out);
- return rc;
- }
- }
- if (out)
- {
- size_t l = STRLEN (out) + 1 + STRLEN (buf) + 1;
- char *newp = realloc (out, l);
- if (!newp)
- {
- free (out);
- return IDNA_MALLOC_ERROR;
- }
- out = newp;
- STRCAT (out, ".");
- STRCAT (out, buf);
- }
- else
- {
- out = strdup (buf);
- if (!out)
- return IDNA_MALLOC_ERROR;
- }
- start = end + 1;
- }
- while (*end);
- *output = out;
- return IDNA_SUCCESS;
- }
- /**
- * idna_to_ascii_8z:
- * @input: zero terminated input UTF-8 string.
- * @output: pointer to newly allocated output string.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert UTF-8 domain name to ASCII string. The domain name may
- * contain several labels, separated by dots. The output buffer must
- * be deallocated by the caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_ascii_8z (const char *input, char **output, int flags)
- {
- uint32_t *ucs4;
- size_t ucs4len;
- int rc;
- ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
- if (!ucs4)
- return IDNA_ICONV_ERROR;
- rc = idna_to_ascii_4z (ucs4, output, flags);
- free (ucs4);
- return rc;
- }
- /**
- * idna_to_ascii_lz:
- * @input: zero terminated input string encoded in the current locale's
- * character set.
- * @output: pointer to newly allocated output string.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert domain name in the locale's encoding to ASCII string. The
- * domain name may contain several labels, separated by dots. The
- * output buffer must be deallocated by the caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_ascii_lz (const char *input, char **output, int flags)
- {
- char *utf8;
- int rc;
- utf8 = stringprep_locale_to_utf8 (input);
- if (!utf8)
- return IDNA_ICONV_ERROR;
- rc = idna_to_ascii_8z (utf8, output, flags);
- free (utf8);
- return rc;
- }
- /**
- * idna_to_unicode_4z4z:
- * @input: zero-terminated Unicode string.
- * @output: pointer to newly allocated output Unicode string.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert possibly ACE encoded domain name in UCS-4 format into a
- * UCS-4 string. The domain name may contain several labels,
- * separated by dots. The output buffer must be deallocated by the
- * caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_unicode_4z4z (const uint32_t *input, uint32_t **output, int flags)
- {
- const uint32_t *start = input;
- const uint32_t *end;
- uint32_t *buf;
- size_t buflen;
- uint32_t *out = NULL;
- size_t outlen = 0;
- *output = NULL;
- do
- {
- end = start;
- for (; *end && !DOTP (*end); end++)
- ;
- buflen = (size_t) (end - start);
- buf = malloc (sizeof (buf[0]) * (buflen + 1));
- if (!buf)
- {
- free (out);
- return IDNA_MALLOC_ERROR;
- }
- /* don't check return code as per specification! */
- idna_to_unicode_44i (start, (size_t) (end - start),
- buf, &buflen, flags);
- if (out)
- {
- uint32_t *newp = realloc (out,
- sizeof (out[0])
- * (outlen + 1 + buflen + 1));
- if (!newp)
- {
- free (buf);
- free (out);
- return IDNA_MALLOC_ERROR;
- }
- out = newp;
- out[outlen++] = 0x002E; /* '.' (full stop) */
- memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
- outlen += buflen;
- out[outlen] = 0x0;
- free (buf);
- }
- else
- {
- out = buf;
- outlen = buflen;
- out[outlen] = 0x0;
- }
- start = end + 1;
- }
- while (*end);
- *output = out;
- return IDNA_SUCCESS;
- }
- /**
- * idna_to_unicode_8z4z:
- * @input: zero-terminated UTF-8 string.
- * @output: pointer to newly allocated output Unicode string.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert possibly ACE encoded domain name in UTF-8 format into a
- * UCS-4 string. The domain name may contain several labels,
- * separated by dots. The output buffer must be deallocated by the
- * caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_unicode_8z4z (const char *input, uint32_t **output, int flags)
- {
- uint32_t *ucs4;
- size_t ucs4len;
- int rc;
- ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
- if (!ucs4)
- return IDNA_ICONV_ERROR;
- rc = idna_to_unicode_4z4z (ucs4, output, flags);
- free (ucs4);
- return rc;
- }
- /**
- * idna_to_unicode_8z8z:
- * @input: zero-terminated UTF-8 string.
- * @output: pointer to newly allocated output UTF-8 string.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert possibly ACE encoded domain name in UTF-8 format into a
- * UTF-8 string. The domain name may contain several labels,
- * separated by dots. The output buffer must be deallocated by the
- * caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_unicode_8z8z (const char *input, char **output, int flags)
- {
- uint32_t *ucs4;
- int rc;
- rc = idna_to_unicode_8z4z (input, &ucs4, flags);
- if (rc != IDNA_SUCCESS)
- return rc;
- *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
- free (ucs4);
- if (!*output)
- return IDNA_ICONV_ERROR;
- return IDNA_SUCCESS;
- }
- /**
- * idna_to_unicode_8zlz:
- * @input: zero-terminated UTF-8 string.
- * @output: pointer to newly allocated output string encoded in the
- * current locale's character set.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert possibly ACE encoded domain name in UTF-8 format into a
- * string encoded in the current locale's character set. The domain
- * name may contain several labels, separated by dots. The output
- * buffer must be deallocated by the caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_unicode_8zlz (const char *input, char **output, int flags)
- {
- char *utf8;
- int rc;
- rc = idna_to_unicode_8z8z (input, &utf8, flags);
- if (rc != IDNA_SUCCESS)
- return rc;
- *output = stringprep_utf8_to_locale (utf8);
- free (utf8);
- if (!*output)
- return IDNA_ICONV_ERROR;
- return IDNA_SUCCESS;
- }
- /**
- * idna_to_unicode_lzlz:
- * @input: zero-terminated string encoded in the current locale's
- * character set.
- * @output: pointer to newly allocated output string encoded in the
- * current locale's character set.
- * @flags: an #Idna_flags value, e.g., %IDNA_ALLOW_UNASSIGNED or
- * %IDNA_USE_STD3_ASCII_RULES.
- *
- * Convert possibly ACE encoded domain name in the locale's character
- * set into a string encoded in the current locale's character set.
- * The domain name may contain several labels, separated by dots. The
- * output buffer must be deallocated by the caller.
- *
- * Return value: Returns %IDNA_SUCCESS on success, or error code.
- **/
- int
- idna_to_unicode_lzlz (const char *input, char **output, int flags)
- {
- char *utf8;
- int rc;
- utf8 = stringprep_locale_to_utf8 (input);
- if (!utf8)
- return IDNA_ICONV_ERROR;
- rc = idna_to_unicode_8zlz (utf8, output, flags);
- free (utf8);
- return rc;
- }
- /**
- * IDNA_ACE_PREFIX
- *
- * The IANA allocated prefix to use for IDNA. "xn--"
- */
- /**
- * Idna_rc:
- * @IDNA_SUCCESS: Successful operation. This value is guaranteed to
- * always be zero, the remaining ones are only guaranteed to hold
- * non-zero values, for logical comparison purposes.
- * @IDNA_STRINGPREP_ERROR: Error during string preparation.
- * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
- * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
- * the string contains non-LDH ASCII characters.
- * @IDNA_CONTAINS_LDH: Same as @IDNA_CONTAINS_NON_LDH, for compatibility
- * with typo in earlier versions.
- * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
- * the string contains a leading or trailing hyphen-minus (U+002D).
- * @IDNA_INVALID_LENGTH: The final output string is not within the
- * (inclusive) range 1 to IDNA_LABEL_MAX_LENGTH characters.
- * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
- * (for ToUnicode).
- * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
- * string does not equal the input.
- * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
- * ToASCII).
- * @IDNA_ICONV_ERROR: Character encoding conversion error.
- * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
- * fatal error).
- * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
- * internally in libc).
- *
- * Enumerated return codes of idna_to_ascii_4i(),
- * idna_to_unicode_44i() functions (and functions derived from those
- * functions). The value 0 is guaranteed to always correspond to
- * success.
- */
- /**
- * Idna_flags:
- * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
- * Unicode code points.
- * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
- * rules (i.e., normal host name rules).
- *
- * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
- */
|