converters.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*
  2. * Copyright (C) 1999-2002, 2004-2009 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
  18. * Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. /* This file defines all the converters. */
  21. /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
  22. typedef unsigned int ucs4_t;
  23. /* State used by a conversion. 0 denotes the initial state. */
  24. typedef unsigned int state_t;
  25. /* iconv_t is an opaque type. This is the real iconv_t type. */
  26. typedef struct conv_struct * conv_t;
  27. /*
  28. * Data type for conversion multibyte -> unicode
  29. */
  30. struct mbtowc_funcs {
  31. int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n);
  32. /*
  33. * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n)
  34. * converts the byte sequence starting at s to a wide character. Up to n bytes
  35. * are available at s. n is >= 1.
  36. * Result is number of bytes consumed (if a wide character was read),
  37. * or -1 if invalid, or -2 if n too small, or -2-(number of bytes consumed)
  38. * if only a shift sequence was read.
  39. */
  40. int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
  41. /*
  42. * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
  43. * returns to the initial state and stores the pending wide character, if any.
  44. * Result is 1 (if a wide character was read) or 0 if none was pending.
  45. */
  46. };
  47. /* Return code if invalid input after a shift sequence of n bytes was read.
  48. (xxx_mbtowc) */
  49. #define RET_SHIFT_ILSEQ(n) (-1-2*(n))
  50. /* Return code if invalid. (xxx_mbtowc) */
  51. #define RET_ILSEQ RET_SHIFT_ILSEQ(0)
  52. /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  53. #define RET_TOOFEW(n) (-2-2*(n))
  54. /* Retrieve the n from the encoded RET_... value. */
  55. #define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
  56. #define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
  57. /*
  58. * Data type for conversion unicode -> multibyte
  59. */
  60. struct wctomb_funcs {
  61. int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n);
  62. /*
  63. * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  64. * converts the wide character wc to the character set xxx, and stores the
  65. * result beginning at r. Up to n bytes may be written at r. n is >= 1.
  66. * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
  67. */
  68. int (*xxx_reset) (conv_t conv, unsigned char *r, int n);
  69. /*
  70. * int xxx_reset (conv_t conv, unsigned char *r, int n)
  71. * stores a shift sequences returning to the initial state beginning at r.
  72. * Up to n bytes may be written at r. n is >= 0.
  73. * Result is number of bytes written, or -2 if n too small.
  74. */
  75. };
  76. /* Return code if invalid. (xxx_wctomb) */
  77. #define RET_ILUNI -1
  78. /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  79. #define RET_TOOSMALL -2
  80. /*
  81. * Contents of a conversion descriptor.
  82. */
  83. struct conv_struct {
  84. struct loop_funcs lfuncs;
  85. /* Input (conversion multibyte -> unicode) */
  86. int iindex;
  87. struct mbtowc_funcs ifuncs;
  88. state_t istate;
  89. /* Output (conversion unicode -> multibyte) */
  90. int oindex;
  91. struct wctomb_funcs ofuncs;
  92. int oflags;
  93. state_t ostate;
  94. /* Operation flags */
  95. int transliterate;
  96. int discard_ilseq;
  97. #ifndef LIBICONV_PLUG
  98. struct iconv_fallbacks fallbacks;
  99. struct iconv_hooks hooks;
  100. #endif
  101. };
  102. /*
  103. * Include all the converters.
  104. */
  105. #include "ascii.h"
  106. /* General multi-byte encodings */
  107. #include "utf8.h"
  108. #include "ucs2.h"
  109. #include "ucs2be.h"
  110. #include "ucs2le.h"
  111. #include "ucs4.h"
  112. #include "ucs4be.h"
  113. #include "ucs4le.h"
  114. #include "utf16.h"
  115. #include "utf16be.h"
  116. #include "utf16le.h"
  117. #include "utf32.h"
  118. #include "utf32be.h"
  119. #include "utf32le.h"
  120. #include "utf7.h"
  121. #include "ucs2internal.h"
  122. #include "ucs2swapped.h"
  123. #include "ucs4internal.h"
  124. #include "ucs4swapped.h"
  125. #include "c99.h"
  126. #include "java.h"
  127. /* 8-bit encodings */
  128. #include "iso8859_1.h"
  129. #include "iso8859_2.h"
  130. #include "iso8859_3.h"
  131. #include "iso8859_4.h"
  132. #include "iso8859_5.h"
  133. #include "iso8859_6.h"
  134. #include "iso8859_7.h"
  135. #include "iso8859_8.h"
  136. #include "iso8859_9.h"
  137. #include "iso8859_10.h"
  138. #include "iso8859_11.h"
  139. #include "iso8859_13.h"
  140. #include "iso8859_14.h"
  141. #include "iso8859_15.h"
  142. #include "iso8859_16.h"
  143. #include "koi8_r.h"
  144. #include "koi8_u.h"
  145. #include "koi8_ru.h"
  146. #include "cp1250.h"
  147. #include "cp1251.h"
  148. #include "cp1252.h"
  149. #include "cp1253.h"
  150. #include "cp1254.h"
  151. #include "cp1255.h"
  152. #include "cp1256.h"
  153. #include "cp1257.h"
  154. #include "cp1258.h"
  155. #include "cp850.h"
  156. #include "cp862.h"
  157. #include "cp866.h"
  158. #include "cp1131.h"
  159. #include "mac_roman.h"
  160. #include "mac_centraleurope.h"
  161. #include "mac_iceland.h"
  162. #include "mac_croatian.h"
  163. #include "mac_romania.h"
  164. #include "mac_cyrillic.h"
  165. #include "mac_ukraine.h"
  166. #include "mac_greek.h"
  167. #include "mac_turkish.h"
  168. #include "mac_hebrew.h"
  169. #include "mac_arabic.h"
  170. #include "mac_thai.h"
  171. #include "hp_roman8.h"
  172. #include "nextstep.h"
  173. #include "armscii_8.h"
  174. #include "georgian_academy.h"
  175. #include "georgian_ps.h"
  176. #include "koi8_t.h"
  177. #include "pt154.h"
  178. #include "rk1048.h"
  179. #include "mulelao.h"
  180. #include "cp1133.h"
  181. #include "tis620.h"
  182. #include "cp874.h"
  183. #include "viscii.h"
  184. #include "tcvn.h"
  185. /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
  186. #ifndef ARCADIA_ICONV_NOCJK
  187. typedef struct {
  188. unsigned short indx; /* index into big table */
  189. unsigned short used; /* bitmask of used entries */
  190. } Summary16;
  191. #include "iso646_jp.h"
  192. #include "jisx0201.h"
  193. #include "jisx0208.h"
  194. #include "jisx0212.h"
  195. #include "iso646_cn.h"
  196. #include "gb2312.h"
  197. #include "isoir165.h"
  198. /*#include "gb12345.h"*/
  199. #include "gbk.h"
  200. #include "cns11643.h"
  201. #include "big5.h"
  202. #include "ksc5601.h"
  203. #endif
  204. #include "johab_hangul.h"
  205. /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
  206. #ifndef ARCADIA_ICONV_NOCJK
  207. #include "euc_jp.h"
  208. #include "sjis.h"
  209. #include "cp932.h"
  210. #include "iso2022_jp.h"
  211. #include "iso2022_jp1.h"
  212. #include "iso2022_jp2.h"
  213. #include "euc_cn.h"
  214. #include "ces_gbk.h"
  215. #include "cp936.h"
  216. #include "gb18030.h"
  217. #include "iso2022_cn.h"
  218. #include "iso2022_cnext.h"
  219. #include "hz.h"
  220. #include "euc_tw.h"
  221. #include "ces_big5.h"
  222. #include "cp950.h"
  223. #include "big5hkscs1999.h"
  224. #include "big5hkscs2001.h"
  225. #include "big5hkscs2004.h"
  226. #include "euc_kr.h"
  227. #include "cp949.h"
  228. #include "johab.h"
  229. #include "iso2022_kr.h"
  230. #endif
  231. /* Encodings used by system dependent locales. */
  232. #ifdef USE_AIX
  233. #include "cp856.h"
  234. #include "cp922.h"
  235. #include "cp943.h"
  236. #include "cp1046.h"
  237. #include "cp1124.h"
  238. #include "cp1129.h"
  239. #include "cp1161.h"
  240. #include "cp1162.h"
  241. #include "cp1163.h"
  242. #endif
  243. #ifdef USE_OSF1
  244. #include "dec_kanji.h"
  245. #include "dec_hanyu.h"
  246. #endif
  247. #ifdef USE_DOS
  248. #include "cp437.h"
  249. #include "cp737.h"
  250. #include "cp775.h"
  251. #include "cp852.h"
  252. #include "cp853.h"
  253. #include "cp855.h"
  254. #include "cp857.h"
  255. #include "cp858.h"
  256. #include "cp860.h"
  257. #include "cp861.h"
  258. #include "cp863.h"
  259. #include "cp864.h"
  260. #include "cp865.h"
  261. #include "cp869.h"
  262. #include "cp1125.h"
  263. #endif
  264. #ifdef USE_EXTRA
  265. #include "euc_jisx0213.h"
  266. #include "shift_jisx0213.h"
  267. #include "iso2022_jp3.h"
  268. #include "big5_2003.h"
  269. #include "tds565.h"
  270. #include "atarist.h"
  271. #include "riscos1.h"
  272. #endif