idn.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at https://curl.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. * SPDX-License-Identifier: curl
  22. *
  23. ***************************************************************************/
  24. /*
  25. * IDN conversions
  26. */
  27. #include "curl_setup.h"
  28. #include "urldata.h"
  29. #include "idn.h"
  30. #include "sendf.h"
  31. #include "curl_multibyte.h"
  32. #include "warnless.h"
  33. #ifdef USE_LIBIDN2
  34. #error #include <idn2.h>
  35. #if defined(_WIN32) && defined(UNICODE)
  36. #define IDN2_LOOKUP(name, host, flags) \
  37. idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
  38. #else
  39. #define IDN2_LOOKUP(name, host, flags) \
  40. idn2_lookup_ul((const char *)name, (char **)host, flags)
  41. #endif
  42. #endif /* USE_LIBIDN2 */
  43. /* The last 3 #include files should be in this order */
  44. #include "curl_printf.h"
  45. #include "curl_memory.h"
  46. #include "memdebug.h"
  47. #ifdef USE_WIN32_IDN
  48. /* using Windows kernel32 and normaliz libraries. */
  49. #if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600
  50. WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
  51. const WCHAR *lpUnicodeCharStr,
  52. int cchUnicodeChar,
  53. WCHAR *lpASCIICharStr,
  54. int cchASCIIChar);
  55. WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
  56. const WCHAR *lpASCIICharStr,
  57. int cchASCIIChar,
  58. WCHAR *lpUnicodeCharStr,
  59. int cchUnicodeChar);
  60. #endif
  61. #define IDN_MAX_LENGTH 255
  62. static CURLcode win32_idn_to_ascii(const char *in, char **out)
  63. {
  64. wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
  65. *out = NULL;
  66. if(in_w) {
  67. wchar_t punycode[IDN_MAX_LENGTH];
  68. int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
  69. IDN_MAX_LENGTH);
  70. curlx_unicodefree(in_w);
  71. if(chars) {
  72. char *mstr = curlx_convert_wchar_to_UTF8(punycode);
  73. if(mstr) {
  74. *out = strdup(mstr);
  75. curlx_unicodefree(mstr);
  76. if(!*out)
  77. return CURLE_OUT_OF_MEMORY;
  78. }
  79. else
  80. return CURLE_OUT_OF_MEMORY;
  81. }
  82. else
  83. return CURLE_URL_MALFORMAT;
  84. }
  85. else
  86. return CURLE_URL_MALFORMAT;
  87. return CURLE_OK;
  88. }
  89. static CURLcode win32_ascii_to_idn(const char *in, char **output)
  90. {
  91. char *out = NULL;
  92. wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
  93. if(in_w) {
  94. WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
  95. int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
  96. IDN_MAX_LENGTH);
  97. if(chars) {
  98. /* 'chars' is "the number of characters retrieved" */
  99. char *mstr = curlx_convert_wchar_to_UTF8(idn);
  100. if(mstr) {
  101. out = strdup(mstr);
  102. curlx_unicodefree(mstr);
  103. if(!out)
  104. return CURLE_OUT_OF_MEMORY;
  105. }
  106. }
  107. else
  108. return CURLE_URL_MALFORMAT;
  109. }
  110. else
  111. return CURLE_URL_MALFORMAT;
  112. *output = out;
  113. return CURLE_OK;
  114. }
  115. #endif /* USE_WIN32_IDN */
  116. /*
  117. * Helpers for IDNA conversions.
  118. */
  119. bool Curl_is_ASCII_name(const char *hostname)
  120. {
  121. /* get an UNSIGNED local version of the pointer */
  122. const unsigned char *ch = (const unsigned char *)hostname;
  123. if(!hostname) /* bad input, consider it ASCII! */
  124. return TRUE;
  125. while(*ch) {
  126. if(*ch++ & 0x80)
  127. return FALSE;
  128. }
  129. return TRUE;
  130. }
  131. #ifdef USE_IDN
  132. /*
  133. * Curl_idn_decode() returns an allocated IDN decoded string if it was
  134. * possible. NULL on error.
  135. *
  136. * CURLE_URL_MALFORMAT - the host name could not be converted
  137. * CURLE_OUT_OF_MEMORY - memory problem
  138. *
  139. */
  140. static CURLcode idn_decode(const char *input, char **output)
  141. {
  142. char *decoded = NULL;
  143. CURLcode result = CURLE_OK;
  144. #ifdef USE_LIBIDN2
  145. if(idn2_check_version(IDN2_VERSION)) {
  146. int flags = IDN2_NFC_INPUT
  147. #if IDN2_VERSION_NUMBER >= 0x00140000
  148. /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
  149. IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
  150. processing. */
  151. | IDN2_NONTRANSITIONAL
  152. #endif
  153. ;
  154. int rc = IDN2_LOOKUP(input, &decoded, flags);
  155. if(rc != IDN2_OK)
  156. /* fallback to TR46 Transitional mode for better IDNA2003
  157. compatibility */
  158. rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
  159. if(rc != IDN2_OK)
  160. result = CURLE_URL_MALFORMAT;
  161. }
  162. else
  163. /* a too old libidn2 version */
  164. result = CURLE_NOT_BUILT_IN;
  165. #elif defined(USE_WIN32_IDN)
  166. result = win32_idn_to_ascii(input, &decoded);
  167. #endif
  168. if(!result)
  169. *output = decoded;
  170. return result;
  171. }
  172. static CURLcode idn_encode(const char *puny, char **output)
  173. {
  174. char *enc = NULL;
  175. #ifdef USE_LIBIDN2
  176. int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
  177. if(rc != IDNA_SUCCESS)
  178. return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
  179. #elif defined(USE_WIN32_IDN)
  180. CURLcode result = win32_ascii_to_idn(puny, &enc);
  181. if(result)
  182. return result;
  183. #endif
  184. *output = enc;
  185. return CURLE_OK;
  186. }
  187. CURLcode Curl_idn_decode(const char *input, char **output)
  188. {
  189. char *d = NULL;
  190. CURLcode result = idn_decode(input, &d);
  191. #ifdef USE_LIBIDN2
  192. if(!result) {
  193. char *c = strdup(d);
  194. idn2_free(d);
  195. if(c)
  196. d = c;
  197. else
  198. result = CURLE_OUT_OF_MEMORY;
  199. }
  200. #endif
  201. if(!result)
  202. *output = d;
  203. return result;
  204. }
  205. CURLcode Curl_idn_encode(const char *puny, char **output)
  206. {
  207. char *d = NULL;
  208. CURLcode result = idn_encode(puny, &d);
  209. #ifdef USE_LIBIDN2
  210. if(!result) {
  211. char *c = strdup(d);
  212. idn2_free(d);
  213. if(c)
  214. d = c;
  215. else
  216. result = CURLE_OUT_OF_MEMORY;
  217. }
  218. #endif
  219. if(!result)
  220. *output = d;
  221. return result;
  222. }
  223. /*
  224. * Frees data allocated by idnconvert_hostname()
  225. */
  226. void Curl_free_idnconverted_hostname(struct hostname *host)
  227. {
  228. if(host->encalloc) {
  229. /* must be freed with idn2_free() if allocated by libidn */
  230. Curl_idn_free(host->encalloc);
  231. host->encalloc = NULL;
  232. }
  233. }
  234. #endif /* USE_IDN */
  235. /*
  236. * Perform any necessary IDN conversion of hostname
  237. */
  238. CURLcode Curl_idnconvert_hostname(struct hostname *host)
  239. {
  240. /* set the name we use to display the host name */
  241. host->dispname = host->name;
  242. #ifdef USE_IDN
  243. /* Check name for non-ASCII and convert hostname if we can */
  244. if(!Curl_is_ASCII_name(host->name)) {
  245. char *decoded;
  246. CURLcode result = idn_decode(host->name, &decoded);
  247. if(!result) {
  248. if(!*decoded) {
  249. /* zero length is a bad host name */
  250. Curl_idn_free(decoded);
  251. return CURLE_URL_MALFORMAT;
  252. }
  253. /* successful */
  254. host->encalloc = decoded;
  255. /* change the name pointer to point to the encoded hostname */
  256. host->name = host->encalloc;
  257. }
  258. else
  259. return result;
  260. }
  261. #endif
  262. return CURLE_OK;
  263. }