gb18030ext.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /*
  2. * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
  18. * Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. /*
  21. * GB18030 two-byte extension
  22. */
  23. static const unsigned short gb18030ext_2uni_pagea9[13] = {
  24. /* 0xa9 */
  25. 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6,
  26. 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb,
  27. };
  28. static const unsigned short gb18030ext_2uni_pagefe[96] = {
  29. /* 0xfe */
  30. 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  31. 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
  32. 0x2e81, 0xe816, 0xe817, 0xe818, 0x2e84, 0x3473, 0x3447, 0x2e88,
  33. 0x2e8b, 0xe81e, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e,
  34. 0x3918, 0xe826, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0xe82b, 0xe82c,
  35. 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0xe831, 0xe832, 0x2eaa, 0x4056,
  36. 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0xe83b, 0x43b1,
  37. 0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0xe843, 0x4723,
  38. 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982,
  39. 0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0xe854,
  40. 0xe855, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13,
  41. 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae, 0xe864,
  42. };
  43. static int
  44. gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
  45. {
  46. unsigned char c1 = s[0];
  47. if ((c1 == 0xa2) || (c1 >= 0xa4 && c1 <= 0xa9) || (c1 == 0xd7) || (c1 == 0xfe)) {
  48. if (n >= 2) {
  49. unsigned char c2 = s[1];
  50. if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff)) {
  51. unsigned int i = 190 * (c1 - 0x81) + (c2 - (c2 >= 0x80 ? 0x41 : 0x40));
  52. unsigned short wc = 0xfffd;
  53. switch (c1) {
  54. case 0xa2:
  55. if (i >= 6376 && i <= 6381) /* 0xA2AB..0xA2B0 */
  56. wc = 0xe766 + (i - 6376);
  57. else if (i == 6432) /* 0xA2E3 */
  58. wc = 0x20ac;
  59. else if (i == 6433) /* 0xA2E4 */
  60. wc = 0xe76d;
  61. else if (i >= 6444 && i <= 6445) /* 0xA2EF..0xA2F0 */
  62. wc = 0xe76e + (i - 6444);
  63. else if (i >= 6458 && i <= 6459) /* 0xA2FD..0xA2FE */
  64. wc = 0xe770 + (i - 6458);
  65. break;
  66. case 0xa4:
  67. if (i >= 6829 && i <= 6839) /* 0xA4F4..0xA4FE */
  68. wc = 0xe772 + (i - 6829);
  69. break;
  70. case 0xa5:
  71. if (i >= 7022 && i <= 7029) /* 0xA5F7..0xA5FE */
  72. wc = 0xe77d + (i - 7022);
  73. break;
  74. case 0xa6:
  75. if (i >= 7150 && i <= 7157) /* 0xA6B9..0xA6C0 */
  76. wc = 0xe785 + (i - 7150);
  77. else if (i >= 7182 && i <= 7190) /* 0xA6D9..0xA6DF */
  78. wc = 0xe78d + (i - 7182);
  79. else if (i >= 7201 && i <= 7202) /* 0xA6EC..0xA6ED */
  80. wc = 0xe794 + (i - 7201);
  81. else if (i == 7208) /* 0xA6F3 */
  82. wc = 0xe796;
  83. else if (i >= 7211 && i <= 7219) /* 0xA6F6..0xA6FE */
  84. wc = 0xe797 + (i - 7211);
  85. break;
  86. case 0xa7:
  87. if (i >= 7349 && i <= 7363) /* 0xA7C2..0xA7D0 */
  88. wc = 0xe7a0 + (i - 7349);
  89. else if (i >= 7397 && i <= 7409) /* 0xA7F2..0xA7FE */
  90. wc = 0xe7af + (i - 7397);
  91. break;
  92. case 0xa8:
  93. if (i >= 7495 && i <= 7505) /* 0xA896..0xA8A0 */
  94. wc = 0xe7bc + (i - 7495);
  95. else if (i == 7533) /* 0xA8BC */
  96. wc = 0xe7c7;
  97. else if (i == 7536) /* 0xA8BF */
  98. wc = 0x01f9;
  99. else if (i >= 7538 && i <= 7541) /* 0xA8C1..0xA8C4 */
  100. wc = 0xe7c9 + (i - 7538);
  101. else if (i >= 7579 && i <= 7599) /* 0xA8EA..0xA8FE */
  102. wc = 0xe7cd + (i - 7579);
  103. break;
  104. case 0xa9:
  105. if (i == 7624) /* 0xA958 */
  106. wc = 0xe7e2;
  107. else if (i == 7627) /* 0xA95B */
  108. wc = 0xe7e3;
  109. else if (i >= 7629 && i <= 7631) /* 0xA95D..0xA95F */
  110. wc = 0xe7e4 + (i - 7629);
  111. else if (i >= 7672 && i < 7685) /* 0xA989..0xA995 */
  112. wc = gb18030ext_2uni_pagea9[i-7672];
  113. else if (i >= 7686 && i <= 7698) /* 0xA997..0xA9A3 */
  114. wc = 0xe7f4 + (i - 7686);
  115. else if (i >= 7775 && i <= 7789) /* 0xA9F0..0xA9FE */
  116. wc = 0xe801 + (i - 7775);
  117. break;
  118. case 0xd7:
  119. if (i >= 16525 && i <= 16529) /* 0xD7FA..0xD7FE */
  120. wc = 0xe810 + (i - 16525);
  121. break;
  122. case 0xfe:
  123. if (i < 23846)
  124. wc = gb18030ext_2uni_pagefe[i-23750];
  125. break;
  126. default:
  127. break;
  128. }
  129. if (wc != 0xfffd) {
  130. *pwc = (ucs4_t) wc;
  131. return 2;
  132. }
  133. }
  134. return RET_ILSEQ;
  135. }
  136. return RET_TOOFEW(0);
  137. }
  138. return RET_ILSEQ;
  139. }
  140. static const unsigned short gb18030ext_page2e[80] = {
  141. 0x0000, 0xfe50, 0x0000, 0x0000, 0xfe54, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/
  142. 0xfe57, 0x0000, 0x0000, 0xfe58, 0xfe5d, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/
  143. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe5e, /*0x90-0x97*/
  144. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x98-0x9f*/
  145. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe6b, /*0xa0-0xa7*/
  146. 0x0000, 0x0000, 0xfe6e, 0x0000, 0x0000, 0x0000, 0xfe71, 0x0000, /*0xa8-0xaf*/
  147. 0x0000, 0x0000, 0x0000, 0xfe73, 0x0000, 0x0000, 0xfe74, 0xfe75, /*0xb0-0xb7*/
  148. 0x0000, 0x0000, 0x0000, 0xfe79, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/
  149. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc0-0xc7*/
  150. 0x0000, 0x0000, 0xfe84, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc8-0xcf*/
  151. };
  152. static const unsigned short gb18030ext_page2f[16] = {
  153. 0xa98a, 0xa98b, 0xa98c, 0xa98d, 0xa98e, 0xa98f, 0xa990, 0xa991, /*0xf0-0xf7*/
  154. 0xa992, 0xa993, 0xa994, 0xa995, 0x0000, 0x0000, 0x0000, 0x0000, /*0xf8-0xff*/
  155. };
  156. static const unsigned short gb18030ext_page34[56] = {
  157. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe56, /*0x40-0x47*/
  158. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/
  159. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/
  160. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/
  161. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/
  162. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x68-0x6f*/
  163. 0x0000, 0x0000, 0x0000, 0xfe55, 0x0000, 0x0000, 0x0000, 0x0000, /*0x70-0x77*/
  164. };
  165. static const unsigned short gb18030ext_page36[24] = {
  166. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x0000, /*0x08-0x0f*/
  167. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x10-0x17*/
  168. 0x0000, 0x0000, 0xfe5b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
  169. };
  170. static const unsigned short gb18030ext_page39[24] = {
  171. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe62, /*0xc8-0xcf*/
  172. 0xfe65, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xd0-0xd7*/
  173. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe63, /*0xd8-0xdf*/
  174. };
  175. static const unsigned short gb18030ext_page43[56] = {
  176. 0x0000, 0x0000, 0x0000, 0x0000, 0xfe78, 0x0000, 0x0000, 0x0000, /*0xa8-0xaf*/
  177. 0x0000, 0xfe77, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb0-0xb7*/
  178. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/
  179. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc0-0xc7*/
  180. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc8-0xcf*/
  181. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xd0-0xd7*/
  182. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe7a, 0x0000, 0x0000, /*0xd8-0xdf*/
  183. };
  184. static const unsigned short gb18030ext_page46[32] = {
  185. 0x0000, 0x0000, 0x0000, 0x0000, 0xfe7d, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/
  186. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/
  187. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/
  188. 0x0000, 0xfe7c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/
  189. };
  190. static const unsigned short gb18030ext_page47_1[16] = {
  191. 0x0000, 0x0000, 0x0000, 0xfe80, 0x0000, 0x0000, 0x0000, 0x0000, /*0x20-0x27*/
  192. 0x0000, 0xfe81, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x28-0x2f*/
  193. };
  194. static const unsigned short gb18030ext_page47_2[24] = {
  195. 0x0000, 0x0000, 0x0000, 0x0000, 0xfe82, 0x0000, 0x0000, 0x0000, /*0x78-0x7f*/
  196. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/
  197. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe83, 0x0000, 0x0000, /*0x88-0x8f*/
  198. };
  199. static const unsigned short gb18030ext_page49[120] = {
  200. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe85, /*0x40-0x47*/
  201. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/
  202. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/
  203. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/
  204. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/
  205. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x68-0x6f*/
  206. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x70-0x77*/
  207. 0x0000, 0x0000, 0xfe86, 0x0000, 0x0000, 0xfe87, 0x0000, 0x0000, /*0x78-0x7f*/
  208. 0x0000, 0x0000, 0xfe88, 0xfe89, 0x0000, 0xfe8a, 0xfe8b, 0x0000, /*0x80-0x87*/
  209. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/
  210. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x90-0x97*/
  211. 0x0000, 0x0000, 0x0000, 0xfe8d, 0x0000, 0x0000, 0x0000, 0xfe8c, /*0x98-0x9f*/
  212. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa0-0xa7*/
  213. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa8-0xaf*/
  214. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe8f, 0xfe8e, /*0xb0-0xb7*/
  215. };
  216. static const unsigned short gb18030ext_page4c[56] = {
  217. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe96, /*0x70-0x77*/
  218. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x78-0x7f*/
  219. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/
  220. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/
  221. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x90-0x97*/
  222. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe93, /*0x98-0x9f*/
  223. 0xfe94, 0xfe95, 0xfe97, 0xfe92, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa0-0xa7*/
  224. };
  225. static const unsigned short gb18030ext_page4d[16] = {
  226. 0x0000, 0x0000, 0x0000, 0xfe98, 0xfe99, 0xfe9a, 0xfe9b, 0xfe9c, /*0x10-0x17*/
  227. 0xfe9d, 0xfe9e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/
  228. };
  229. static int
  230. gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
  231. {
  232. if (n >= 2) {
  233. unsigned short c = 0;
  234. if (wc == 0x01f9)
  235. c = 0xa8bf;
  236. else if (wc == 0x20ac)
  237. c = 0xa2e3;
  238. else if (wc >= 0x2e80 && wc < 0x2ed0)
  239. c = gb18030ext_page2e[wc-0x2e80];
  240. else if (wc >= 0x2ff0 && wc < 0x3000)
  241. c = gb18030ext_page2f[wc-0x2ff0];
  242. else if (wc == 0x303e)
  243. c = 0xa989;
  244. else if (wc >= 0x3440 && wc < 0x3478)
  245. c = gb18030ext_page34[wc-0x3440];
  246. else if (wc == 0x359e)
  247. c = 0xfe5a;
  248. else if (wc >= 0x3608 && wc < 0x3620)
  249. c = gb18030ext_page36[wc-0x3608];
  250. else if (wc == 0x3918)
  251. c = 0xfe60;
  252. else if (wc == 0x396e)
  253. c = 0xfe5f;
  254. else if (wc >= 0x39c8 && wc < 0x39e0)
  255. c = gb18030ext_page39[wc-0x39c8];
  256. else if (wc == 0x3a73)
  257. c = 0xfe64;
  258. else if (wc == 0x3b4e)
  259. c = 0xfe68;
  260. else if (wc == 0x3c6e)
  261. c = 0xfe69;
  262. else if (wc == 0x3ce0)
  263. c = 0xfe6a;
  264. else if (wc == 0x4056)
  265. c = 0xfe6f;
  266. else if (wc == 0x415f)
  267. c = 0xfe70;
  268. else if (wc == 0x4337)
  269. c = 0xfe72;
  270. else if (wc >= 0x43a8 && wc < 0x43e0)
  271. c = gb18030ext_page43[wc-0x43a8];
  272. else if (wc == 0x44d6)
  273. c = 0xfe7b;
  274. else if (wc >= 0x4648 && wc < 0x4668)
  275. c = gb18030ext_page46[wc-0x4648];
  276. else if (wc >= 0x4720 && wc < 0x4730)
  277. c = gb18030ext_page47_1[wc-0x4720];
  278. else if (wc >= 0x4778 && wc < 0x4790)
  279. c = gb18030ext_page47_2[wc-0x4778];
  280. else if (wc >= 0x4940 && wc < 0x49b8)
  281. c = gb18030ext_page49[wc-0x4940];
  282. else if (wc >= 0x4c70 && wc < 0x4ca8)
  283. c = gb18030ext_page4c[wc-0x4c70];
  284. else if (wc >= 0x4d10 && wc < 0x4d20)
  285. c = gb18030ext_page4d[wc-0x4d10];
  286. else if (wc == 0x4dae)
  287. c = 0xfe9f;
  288. if (c != 0) {
  289. r[0] = (c >> 8); r[1] = (c & 0xff);
  290. return 2;
  291. }
  292. return RET_ILUNI;
  293. }
  294. return RET_TOOSMALL;
  295. }