kanji.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235
  1. /*
  2. Unix SMB/Netbios implementation.
  3. Version 1.9.
  4. Kanji Extensions
  5. Copyright (C) Andrew Tridgell 1992-1998
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17. Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
  18. and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
  19. and add all jis codes sequence type at 1995.8.16
  20. Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
  21. */
  22. #define _KANJI_C_
  23. #include "includes.h"
  24. /*
  25. * Function pointers that get overridden when multi-byte code pages
  26. * are loaded.
  27. */
  28. const char *(*multibyte_strchr)(const char *, int ) = (const char *(*)(const char *, int )) strchr;
  29. const char *(*multibyte_strrchr)(const char *, int ) = (const char *(*)(const char *, int )) strrchr;
  30. const char *(*multibyte_strstr)(const char *, const char *) = (const char *(*)(const char *, const char *)) strstr;
  31. char *(*multibyte_strtok)(char *, const char *) = (char *(*)(char *, const char *)) strtok;
  32. /*
  33. * Kanji is treated differently here due to historical accident of
  34. * it being the first non-English codepage added to Samba.
  35. * The define 'KANJI' is being overloaded to mean 'use kanji codepage
  36. * by default' and also 'this is the filename-to-disk conversion
  37. * method to use'. This really should be removed and all control
  38. * over this left in the smb.conf parameters 'client codepage'
  39. * and 'coding system'.
  40. */
  41. #ifndef KANJI
  42. /*
  43. * Set the default conversion to be the functions in
  44. * charcnv.c.
  45. */
  46. static size_t skip_non_multibyte_char(char);
  47. static BOOL not_multibyte_char_1(char);
  48. char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
  49. char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
  50. size_t (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
  51. BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
  52. #else /* KANJI */
  53. /*
  54. * Set the default conversion to be the function
  55. * sj_to_sj in this file.
  56. */
  57. static char *sj_to_sj(char *from, BOOL overwrite);
  58. static size_t skip_kanji_multibyte_char(char);
  59. static BOOL is_kanji_multibyte_char_1(char);
  60. char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
  61. char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
  62. size_t (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
  63. int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
  64. #endif /* KANJI */
  65. /* jis si/so sequence */
  66. static char jis_kso = JIS_KSO;
  67. static char jis_ksi = JIS_KSI;
  68. static char hex_tag = HEXTAG;
  69. /*******************************************************************
  70. SHIFT JIS functions
  71. ********************************************************************/
  72. /*******************************************************************
  73. search token from S1 separated any char of S2
  74. S1 contains SHIFT JIS chars.
  75. ********************************************************************/
  76. static char *sj_strtok(char *s1, const char *s2)
  77. {
  78. static char *s = NULL;
  79. char *q;
  80. if (!s1) {
  81. if (!s) {
  82. return NULL;
  83. }
  84. s1 = s;
  85. }
  86. for (q = s1; *s1; ) {
  87. if (is_shift_jis (*s1)) {
  88. s1 += 2;
  89. } else if (is_kana (*s1)) {
  90. s1++;
  91. } else {
  92. char *p = strchr (s2, *s1);
  93. if (p) {
  94. if (s1 != q) {
  95. s = s1 + 1;
  96. *s1 = '\0';
  97. return q;
  98. }
  99. q = s1 + 1;
  100. }
  101. s1++;
  102. }
  103. }
  104. s = NULL;
  105. if (*q) {
  106. return q;
  107. }
  108. return NULL;
  109. }
  110. /*******************************************************************
  111. search string S2 from S1
  112. S1 contains SHIFT JIS chars.
  113. ********************************************************************/
  114. static const char *sj_strstr(const char *s1, const char *s2)
  115. {
  116. size_t len = strlen (s2);
  117. if (!*s2)
  118. return (const char *) s1;
  119. for (;*s1;) {
  120. if (*s1 == *s2) {
  121. if (strncmp (s1, s2, len) == 0)
  122. return (const char *) s1;
  123. }
  124. if (is_shift_jis (*s1)) {
  125. s1 += 2;
  126. } else {
  127. s1++;
  128. }
  129. }
  130. return NULL;
  131. }
  132. /*******************************************************************
  133. Search char C from beginning of S.
  134. S contains SHIFT JIS chars.
  135. ********************************************************************/
  136. static const char *sj_strchr (const char *s, int c)
  137. {
  138. for (; *s; ) {
  139. if (*s == c)
  140. return (const char *) s;
  141. if (is_shift_jis (*s)) {
  142. s += 2;
  143. } else {
  144. s++;
  145. }
  146. }
  147. return NULL;
  148. }
  149. /*******************************************************************
  150. Search char C end of S.
  151. S contains SHIFT JIS chars.
  152. ********************************************************************/
  153. static const char *sj_strrchr(const char *s, int c)
  154. {
  155. const char *q;
  156. for (q = 0; *s; ) {
  157. if (*s == c) {
  158. q = (const char *) s;
  159. }
  160. if (is_shift_jis (*s)) {
  161. s += 2;
  162. } else {
  163. s++;
  164. }
  165. }
  166. return q;
  167. }
  168. /*******************************************************************
  169. Kanji multibyte char skip function.
  170. *******************************************************************/
  171. static size_t skip_kanji_multibyte_char(char c)
  172. {
  173. if(is_shift_jis(c)) {
  174. return 2;
  175. } else if (is_kana(c)) {
  176. return 1;
  177. }
  178. return 0;
  179. }
  180. /*******************************************************************
  181. Kanji multibyte char identification.
  182. *******************************************************************/
  183. static BOOL is_kanji_multibyte_char_1(char c)
  184. {
  185. return is_shift_jis(c);
  186. }
  187. /*******************************************************************
  188. The following functions are the only ones needed to do multibyte
  189. support for Hangul, Big5 and Simplified Chinese. Most of the
  190. real work for these codepages is done in the generic multibyte
  191. functions. The only reason these functions are needed at all
  192. is that the is_xxx(c) calls are really preprocessor macros.
  193. ********************************************************************/
  194. /*******************************************************************
  195. Hangul (Korean - code page 949) function.
  196. ********************************************************************/
  197. static BOOL hangul_is_multibyte_char_1(char c)
  198. {
  199. return is_hangul(c);
  200. }
  201. /*******************************************************************
  202. Big5 Traditional Chinese (code page 950) function.
  203. ********************************************************************/
  204. static BOOL big5_is_multibyte_char_1(char c)
  205. {
  206. return is_big5_c1(c);
  207. }
  208. /*******************************************************************
  209. Simplified Chinese (code page 936) function.
  210. ********************************************************************/
  211. static BOOL simpch_is_multibyte_char_1(char c)
  212. {
  213. return is_simpch_c1(c);
  214. }
  215. /*******************************************************************
  216. Generic multibyte functions - used by Hangul, Big5 and Simplified
  217. Chinese codepages.
  218. ********************************************************************/
  219. /*******************************************************************
  220. search token from S1 separated any char of S2
  221. S1 contains generic multibyte chars.
  222. ********************************************************************/
  223. static char *generic_multibyte_strtok(char *s1, const char *s2)
  224. {
  225. static char *s = NULL;
  226. char *q;
  227. if (!s1) {
  228. if (!s) {
  229. return NULL;
  230. }
  231. s1 = s;
  232. }
  233. for (q = s1; *s1; ) {
  234. if ((*is_multibyte_char_1)(*s1)) {
  235. s1 += 2;
  236. } else {
  237. char *p = strchr (s2, *s1);
  238. if (p) {
  239. if (s1 != q) {
  240. s = s1 + 1;
  241. *s1 = '\0';
  242. return q;
  243. }
  244. q = s1 + 1;
  245. }
  246. s1++;
  247. }
  248. }
  249. s = NULL;
  250. if (*q) {
  251. return q;
  252. }
  253. return NULL;
  254. }
  255. /*******************************************************************
  256. search string S2 from S1
  257. S1 contains generic multibyte chars.
  258. ********************************************************************/
  259. static const char *generic_multibyte_strstr(const char *s1, const char *s2)
  260. {
  261. size_t len = strlen (s2);
  262. if (!*s2)
  263. return (const char *) s1;
  264. for (;*s1;) {
  265. if (*s1 == *s2) {
  266. if (strncmp (s1, s2, len) == 0)
  267. return (const char *) s1;
  268. }
  269. if ((*is_multibyte_char_1)(*s1)) {
  270. s1 += 2;
  271. } else {
  272. s1++;
  273. }
  274. }
  275. return NULL;
  276. }
  277. /*******************************************************************
  278. Search char C from beginning of S.
  279. S contains generic multibyte chars.
  280. ********************************************************************/
  281. static const char *generic_multibyte_strchr(const char *s, int c)
  282. {
  283. for (; *s; ) {
  284. if (*s == c)
  285. return (const char *) s;
  286. if ((*is_multibyte_char_1)(*s)) {
  287. s += 2;
  288. } else {
  289. s++;
  290. }
  291. }
  292. return NULL;
  293. }
  294. /*******************************************************************
  295. Search char C end of S.
  296. S contains generic multibyte chars.
  297. ********************************************************************/
  298. static const char *generic_multibyte_strrchr(const char *s, int c)
  299. {
  300. const char *q;
  301. for (q = 0; *s; ) {
  302. if (*s == c) {
  303. q = (const char *) s;
  304. }
  305. if ((*is_multibyte_char_1)(*s)) {
  306. s += 2;
  307. } else {
  308. s++;
  309. }
  310. }
  311. return q;
  312. }
  313. /*******************************************************************
  314. Generic multibyte char skip function.
  315. *******************************************************************/
  316. static size_t skip_generic_multibyte_char(char c)
  317. {
  318. if( (*is_multibyte_char_1)(c)) {
  319. return 2;
  320. }
  321. return 0;
  322. }
  323. /*******************************************************************
  324. Code conversion
  325. ********************************************************************/
  326. /* convesion buffer */
  327. static char cvtbuf[1024];
  328. /*******************************************************************
  329. EUC <-> SJIS
  330. ********************************************************************/
  331. static int euc2sjis (int hi, int lo)
  332. {
  333. if (hi & 1)
  334. return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) |
  335. (lo - (lo >= 0xe0 ? 0x60 : 0x61));
  336. else
  337. return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
  338. }
  339. static int sjis2euc (int hi, int lo)
  340. {
  341. if (lo >= 0x9f)
  342. return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
  343. else
  344. return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
  345. (lo + (lo >= 0x7f ? 0x60 : 0x61));
  346. }
  347. /*******************************************************************
  348. Convert FROM contain SHIFT JIS codes to EUC codes
  349. return converted buffer
  350. ********************************************************************/
  351. static char *sj_to_euc(char *from, BOOL overwrite)
  352. {
  353. char *out;
  354. char *save;
  355. save = (char *) from;
  356. for (out = cvtbuf; *from;) {
  357. if (is_shift_jis (*from)) {
  358. int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
  359. *out++ = (code >> 8) & 0xff;
  360. *out++ = code;
  361. from += 2;
  362. } else if (is_kana (*from)) {
  363. *out++ = (char)euc_kana;
  364. *out++ = *from++;
  365. } else {
  366. *out++ = *from++;
  367. }
  368. }
  369. *out = 0;
  370. if (overwrite) {
  371. pstrcpy((char *) save, (char *) cvtbuf);
  372. return (char *) save;
  373. } else {
  374. return cvtbuf;
  375. }
  376. }
  377. /*******************************************************************
  378. Convert FROM contain EUC codes to SHIFT JIS codes
  379. return converted buffer
  380. ********************************************************************/
  381. static char *euc_to_sj(char *from, BOOL overwrite)
  382. {
  383. char *out;
  384. char *save;
  385. save = (char *) from;
  386. for (out = cvtbuf; *from; ) {
  387. if (is_euc (*from)) {
  388. int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  389. *out++ = (code >> 8) & 0xff;
  390. *out++ = code;
  391. from += 2;
  392. } else if (is_euc_kana (*from)) {
  393. *out++ = from[1];
  394. from += 2;
  395. } else {
  396. *out++ = *from++;
  397. }
  398. }
  399. *out = 0;
  400. if (overwrite) {
  401. pstrcpy(save, (char *) cvtbuf);
  402. return save;
  403. } else {
  404. return cvtbuf;
  405. }
  406. }
  407. /*******************************************************************
  408. JIS7,JIS8,JUNET <-> SJIS
  409. ********************************************************************/
  410. static int sjis2jis(int hi, int lo)
  411. {
  412. if (lo >= 0x9f)
  413. return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
  414. else
  415. return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
  416. (lo - (lo >= 0x7f ? 0x20 : 0x1f));
  417. }
  418. static int jis2sjis(int hi, int lo)
  419. {
  420. if (hi & 1)
  421. return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) |
  422. (lo + (lo >= 0x60 ? 0x20 : 0x1f));
  423. else
  424. return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
  425. }
  426. /*******************************************************************
  427. Convert FROM contain JIS codes to SHIFT JIS codes
  428. return converted buffer
  429. ********************************************************************/
  430. static char *jis8_to_sj(char *from, BOOL overwrite)
  431. {
  432. char *out;
  433. int shifted;
  434. char *save;
  435. shifted = _KJ_ROMAN;
  436. save = (char *) from;
  437. for (out = cvtbuf; *from;) {
  438. if (is_esc (*from)) {
  439. if (is_so1 (from[1]) && is_so2 (from[2])) {
  440. shifted = _KJ_KANJI;
  441. from += 3;
  442. } else if (is_si1 (from[1]) && is_si2 (from[2])) {
  443. shifted = _KJ_ROMAN;
  444. from += 3;
  445. } else { /* sequence error */
  446. goto normal;
  447. }
  448. } else {
  449. normal:
  450. switch (shifted) {
  451. default:
  452. case _KJ_ROMAN:
  453. *out++ = *from++;
  454. break;
  455. case _KJ_KANJI:
  456. {
  457. int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  458. *out++ = (code >> 8) & 0xff;
  459. *out++ = code;
  460. from += 2;
  461. }
  462. break;
  463. }
  464. }
  465. }
  466. *out = 0;
  467. if (overwrite) {
  468. pstrcpy (save, (char *) cvtbuf);
  469. return save;
  470. } else {
  471. return cvtbuf;
  472. }
  473. }
  474. /*******************************************************************
  475. Convert FROM contain SHIFT JIS codes to JIS codes
  476. return converted buffer
  477. ********************************************************************/
  478. static char *sj_to_jis8(char *from, BOOL overwrite)
  479. {
  480. char *out;
  481. int shifted;
  482. char *save;
  483. shifted = _KJ_ROMAN;
  484. save = (char *) from;
  485. for (out = cvtbuf; *from; ) {
  486. if (is_shift_jis (*from)) {
  487. int code;
  488. switch (shifted) {
  489. case _KJ_ROMAN: /* to KANJI */
  490. *out++ = jis_esc;
  491. *out++ = jis_so1;
  492. *out++ = jis_kso;
  493. shifted = _KJ_KANJI;
  494. break;
  495. }
  496. code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  497. *out++ = (code >> 8) & 0xff;
  498. *out++ = code;
  499. from += 2;
  500. } else {
  501. switch (shifted) {
  502. case _KJ_KANJI: /* to ROMAN/KANA */
  503. *out++ = jis_esc;
  504. *out++ = jis_si1;
  505. *out++ = jis_ksi;
  506. shifted = _KJ_ROMAN;
  507. break;
  508. }
  509. *out++ = *from++;
  510. }
  511. }
  512. switch (shifted) {
  513. case _KJ_KANJI: /* to ROMAN/KANA */
  514. *out++ = jis_esc;
  515. *out++ = jis_si1;
  516. *out++ = jis_ksi;
  517. shifted = _KJ_ROMAN;
  518. break;
  519. }
  520. *out = 0;
  521. if (overwrite) {
  522. pstrcpy (save, (char *) cvtbuf);
  523. return save;
  524. } else {
  525. return cvtbuf;
  526. }
  527. }
  528. /*******************************************************************
  529. Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
  530. return converted buffer
  531. ********************************************************************/
  532. static char *jis7_to_sj(char *from, BOOL overwrite)
  533. {
  534. char *out;
  535. int shifted;
  536. char *save;
  537. shifted = _KJ_ROMAN;
  538. save = (char *) from;
  539. for (out = cvtbuf; *from;) {
  540. if (is_esc (*from)) {
  541. if (is_so1 (from[1]) && is_so2 (from[2])) {
  542. shifted = _KJ_KANJI;
  543. from += 3;
  544. } else if (is_si1 (from[1]) && is_si2 (from[2])) {
  545. shifted = _KJ_ROMAN;
  546. from += 3;
  547. } else { /* sequence error */
  548. goto normal;
  549. }
  550. } else if (is_so (*from)) {
  551. shifted = _KJ_KANA; /* to KANA */
  552. from++;
  553. } else if (is_si (*from)) {
  554. shifted = _KJ_ROMAN; /* to ROMAN */
  555. from++;
  556. } else {
  557. normal:
  558. switch (shifted) {
  559. default:
  560. case _KJ_ROMAN:
  561. *out++ = *from++;
  562. break;
  563. case _KJ_KANJI:
  564. {
  565. int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  566. *out++ = (code >> 8) & 0xff;
  567. *out++ = code;
  568. from += 2;
  569. }
  570. break;
  571. case _KJ_KANA:
  572. *out++ = ((int) from[0]) + 0x80;
  573. break;
  574. }
  575. }
  576. }
  577. *out = 0;
  578. if (overwrite) {
  579. pstrcpy (save, (char *) cvtbuf);
  580. return save;
  581. } else {
  582. return cvtbuf;
  583. }
  584. }
  585. /*******************************************************************
  586. Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
  587. return converted buffer
  588. ********************************************************************/
  589. static char *sj_to_jis7(char *from, BOOL overwrite)
  590. {
  591. char *out;
  592. int shifted;
  593. char *save;
  594. shifted = _KJ_ROMAN;
  595. save = (char *) from;
  596. for (out = cvtbuf; *from; ) {
  597. if (is_shift_jis (*from)) {
  598. int code;
  599. switch (shifted) {
  600. case _KJ_KANA:
  601. *out++ = jis_si; /* to ROMAN and through down */
  602. case _KJ_ROMAN: /* to KANJI */
  603. *out++ = jis_esc;
  604. *out++ = jis_so1;
  605. *out++ = jis_kso;
  606. shifted = _KJ_KANJI;
  607. break;
  608. }
  609. code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  610. *out++ = (code >> 8) & 0xff;
  611. *out++ = code;
  612. from += 2;
  613. } else if (is_kana (from[0])) {
  614. switch (shifted) {
  615. case _KJ_KANJI: /* to ROMAN */
  616. *out++ = jis_esc;
  617. *out++ = jis_si1;
  618. *out++ = jis_ksi;
  619. case _KJ_ROMAN: /* to KANA */
  620. *out++ = jis_so;
  621. shifted = _KJ_KANA;
  622. break;
  623. }
  624. *out++ = ((int) *from++) - 0x80;
  625. } else {
  626. switch (shifted) {
  627. case _KJ_KANA:
  628. *out++ = jis_si; /* to ROMAN */
  629. shifted = _KJ_ROMAN;
  630. break;
  631. case _KJ_KANJI: /* to ROMAN */
  632. *out++ = jis_esc;
  633. *out++ = jis_si1;
  634. *out++ = jis_ksi;
  635. shifted = _KJ_ROMAN;
  636. break;
  637. }
  638. *out++ = *from++;
  639. }
  640. }
  641. switch (shifted) {
  642. case _KJ_KANA:
  643. *out++ = jis_si; /* to ROMAN */
  644. break;
  645. case _KJ_KANJI: /* to ROMAN */
  646. *out++ = jis_esc;
  647. *out++ = jis_si1;
  648. *out++ = jis_ksi;
  649. break;
  650. }
  651. *out = 0;
  652. if (overwrite) {
  653. pstrcpy (save, (char *) cvtbuf);
  654. return save;
  655. } else {
  656. return cvtbuf;
  657. }
  658. }
  659. /*******************************************************************
  660. Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
  661. return converted buffer
  662. ********************************************************************/
  663. static char *junet_to_sj(char *from, BOOL overwrite)
  664. {
  665. char *out;
  666. int shifted;
  667. char *save;
  668. shifted = _KJ_ROMAN;
  669. save = (char *) from;
  670. for (out = cvtbuf; *from;) {
  671. if (is_esc (*from)) {
  672. if (is_so1 (from[1]) && is_so2 (from[2])) {
  673. shifted = _KJ_KANJI;
  674. from += 3;
  675. } else if (is_si1 (from[1]) && is_si2 (from[2])) {
  676. shifted = _KJ_ROMAN;
  677. from += 3;
  678. } else if (is_juk1(from[1]) && is_juk2 (from[2])) {
  679. shifted = _KJ_KANA;
  680. from += 3;
  681. } else { /* sequence error */
  682. goto normal;
  683. }
  684. } else {
  685. normal:
  686. switch (shifted) {
  687. default:
  688. case _KJ_ROMAN:
  689. *out++ = *from++;
  690. break;
  691. case _KJ_KANJI:
  692. {
  693. int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  694. *out++ = (code >> 8) & 0xff;
  695. *out++ = code;
  696. from += 2;
  697. }
  698. break;
  699. case _KJ_KANA:
  700. *out++ = ((int) from[0]) + 0x80;
  701. break;
  702. }
  703. }
  704. }
  705. *out = 0;
  706. if (overwrite) {
  707. pstrcpy (save, (char *) cvtbuf);
  708. return save;
  709. } else {
  710. return cvtbuf;
  711. }
  712. }
  713. /*******************************************************************
  714. Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
  715. return converted buffer
  716. ********************************************************************/
  717. static char *sj_to_junet(char *from, BOOL overwrite)
  718. {
  719. char *out;
  720. int shifted;
  721. char *save;
  722. shifted = _KJ_ROMAN;
  723. save = (char *) from;
  724. for (out = cvtbuf; *from; ) {
  725. if (is_shift_jis (*from)) {
  726. int code;
  727. switch (shifted) {
  728. case _KJ_KANA:
  729. case _KJ_ROMAN: /* to KANJI */
  730. *out++ = jis_esc;
  731. *out++ = jis_so1;
  732. *out++ = jis_so2;
  733. shifted = _KJ_KANJI;
  734. break;
  735. }
  736. code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
  737. *out++ = (code >> 8) & 0xff;
  738. *out++ = code;
  739. from += 2;
  740. } else if (is_kana (from[0])) {
  741. switch (shifted) {
  742. case _KJ_KANJI: /* to ROMAN */
  743. case _KJ_ROMAN: /* to KANA */
  744. *out++ = jis_esc;
  745. *out++ = junet_kana1;
  746. *out++ = junet_kana2;
  747. shifted = _KJ_KANA;
  748. break;
  749. }
  750. *out++ = ((int) *from++) - 0x80;
  751. } else {
  752. switch (shifted) {
  753. case _KJ_KANA:
  754. case _KJ_KANJI: /* to ROMAN */
  755. *out++ = jis_esc;
  756. *out++ = jis_si1;
  757. *out++ = jis_si2;
  758. shifted = _KJ_ROMAN;
  759. break;
  760. }
  761. *out++ = *from++;
  762. }
  763. }
  764. switch (shifted) {
  765. case _KJ_KANA:
  766. case _KJ_KANJI: /* to ROMAN */
  767. *out++ = jis_esc;
  768. *out++ = jis_si1;
  769. *out++ = jis_si2;
  770. break;
  771. }
  772. *out = 0;
  773. if (overwrite) {
  774. pstrcpy (save, (char *) cvtbuf);
  775. return save;
  776. } else {
  777. return cvtbuf;
  778. }
  779. }
  780. /*******************************************************************
  781. HEX <-> SJIS
  782. ********************************************************************/
  783. /* ":xx" -> a byte */
  784. static char *hex_to_sj(char *from, BOOL overwrite)
  785. {
  786. char *sp, *dp;
  787. sp = (char *) from;
  788. dp = cvtbuf;
  789. while (*sp) {
  790. if (*sp == hex_tag && isxdigit((int)sp[1]) && isxdigit((int)sp[2])) {
  791. *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
  792. sp += 3;
  793. } else
  794. *dp++ = *sp++;
  795. }
  796. *dp = '\0';
  797. if (overwrite) {
  798. pstrcpy ((char *) from, (char *) cvtbuf);
  799. return (char *) from;
  800. } else {
  801. return cvtbuf;
  802. }
  803. }
  804. /*******************************************************************
  805. kanji/kana -> ":xx"
  806. ********************************************************************/
  807. static char *sj_to_hex(char *from, BOOL overwrite)
  808. {
  809. unsigned char *sp, *dp;
  810. sp = (unsigned char*) from;
  811. dp = (unsigned char*) cvtbuf;
  812. while (*sp) {
  813. if (is_kana(*sp)) {
  814. *dp++ = hex_tag;
  815. *dp++ = bin2hex (((*sp)>>4)&0x0f);
  816. *dp++ = bin2hex ((*sp)&0x0f);
  817. sp++;
  818. } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) {
  819. *dp++ = hex_tag;
  820. *dp++ = bin2hex (((*sp)>>4)&0x0f);
  821. *dp++ = bin2hex ((*sp)&0x0f);
  822. sp++;
  823. *dp++ = hex_tag;
  824. *dp++ = bin2hex (((*sp)>>4)&0x0f);
  825. *dp++ = bin2hex ((*sp)&0x0f);
  826. sp++;
  827. } else
  828. *dp++ = *sp++;
  829. }
  830. *dp = '\0';
  831. if (overwrite) {
  832. pstrcpy ((char *) from, (char *) cvtbuf);
  833. return (char *) from;
  834. } else {
  835. return cvtbuf;
  836. }
  837. }
  838. /*******************************************************************
  839. CAP <-> SJIS
  840. ********************************************************************/
  841. /* ":xx" CAP -> a byte */
  842. static char *cap_to_sj(char *from, BOOL overwrite)
  843. {
  844. char *sp, *dp;
  845. sp = (char *) from;
  846. dp = cvtbuf;
  847. while (*sp) {
  848. /*
  849. * The only change between this and hex_to_sj is here. sj_to_cap only
  850. * translates characters greater or equal to 0x80 - make sure that here
  851. * we only do the reverse (that's why the strchr is used rather than
  852. * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
  853. */
  854. if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit((int)sp[2])) {
  855. *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
  856. sp += 3;
  857. } else
  858. *dp++ = *sp++;
  859. }
  860. *dp = '\0';
  861. if (overwrite) {
  862. pstrcpy ((char *) from, (char *) cvtbuf);
  863. return (char *) from;
  864. } else {
  865. return cvtbuf;
  866. }
  867. }
  868. /*******************************************************************
  869. kanji/kana -> ":xx" - CAP format.
  870. ********************************************************************/
  871. static char *sj_to_cap(char *from, BOOL overwrite)
  872. {
  873. unsigned char *sp, *dp;
  874. sp = (unsigned char*) from;
  875. dp = (unsigned char*) cvtbuf;
  876. while (*sp) {
  877. if (*sp >= 0x80) {
  878. *dp++ = hex_tag;
  879. *dp++ = bin2hex (((*sp)>>4)&0x0f);
  880. *dp++ = bin2hex ((*sp)&0x0f);
  881. sp++;
  882. } else {
  883. *dp++ = *sp++;
  884. }
  885. }
  886. *dp = '\0';
  887. if (overwrite) {
  888. pstrcpy ((char *) from, (char *) cvtbuf);
  889. return (char *) from;
  890. } else {
  891. return cvtbuf;
  892. }
  893. }
  894. /*******************************************************************
  895. sj to sj
  896. ********************************************************************/
  897. static char *sj_to_sj(char *from, BOOL overwrite)
  898. {
  899. if (!overwrite) {
  900. pstrcpy (cvtbuf, (char *) from);
  901. return cvtbuf;
  902. } else {
  903. return (char *) from;
  904. }
  905. }
  906. /************************************************************************
  907. conversion:
  908. _dos_to_unix _unix_to_dos
  909. ************************************************************************/
  910. static void setup_string_function(int codes)
  911. {
  912. switch (codes) {
  913. default:
  914. _dos_to_unix = dos2unix_format;
  915. _unix_to_dos = unix2dos_format;
  916. break;
  917. case SJIS_CODE:
  918. _dos_to_unix = sj_to_sj;
  919. _unix_to_dos = sj_to_sj;
  920. break;
  921. case EUC_CODE:
  922. _dos_to_unix = sj_to_euc;
  923. _unix_to_dos = euc_to_sj;
  924. break;
  925. case JIS7_CODE:
  926. _dos_to_unix = sj_to_jis7;
  927. _unix_to_dos = jis7_to_sj;
  928. break;
  929. case JIS8_CODE:
  930. _dos_to_unix = sj_to_jis8;
  931. _unix_to_dos = jis8_to_sj;
  932. break;
  933. case JUNET_CODE:
  934. _dos_to_unix = sj_to_junet;
  935. _unix_to_dos = junet_to_sj;
  936. break;
  937. case HEX_CODE:
  938. _dos_to_unix = sj_to_hex;
  939. _unix_to_dos = hex_to_sj;
  940. break;
  941. case CAP_CODE:
  942. _dos_to_unix = sj_to_cap;
  943. _unix_to_dos = cap_to_sj;
  944. break;
  945. }
  946. }
  947. /************************************************************************
  948. Interpret coding system.
  949. ************************************************************************/
  950. void interpret_coding_system(const char *str)
  951. {
  952. int codes = UNKNOWN_CODE;
  953. if (strequal (str, "sjis")) {
  954. codes = SJIS_CODE;
  955. } else if (strequal (str, "euc")) {
  956. codes = EUC_CODE;
  957. } else if (strequal (str, "cap")) {
  958. codes = CAP_CODE;
  959. hex_tag = HEXTAG;
  960. } else if (strequal (str, "hex")) {
  961. codes = HEX_CODE;
  962. hex_tag = HEXTAG;
  963. } else if (!strncasecmp (str, "hex", 3)) {
  964. codes = HEX_CODE;
  965. hex_tag = (str[3] ? str[3] : HEXTAG);
  966. } else if (strequal (str, "j8bb")) {
  967. codes = JIS8_CODE;
  968. jis_kso = 'B';
  969. jis_ksi = 'B';
  970. } else if (strequal (str, "j8bj") || strequal (str, "jis8")) {
  971. codes = JIS8_CODE;
  972. jis_kso = 'B';
  973. jis_ksi = 'J';
  974. } else if (strequal (str, "j8bh")) {
  975. codes = JIS8_CODE;
  976. jis_kso = 'B';
  977. jis_ksi = 'H';
  978. } else if (strequal (str, "j8@b")) {
  979. codes = JIS8_CODE;
  980. jis_kso = '@';
  981. jis_ksi = 'B';
  982. } else if (strequal (str, "j8@j")) {
  983. codes = JIS8_CODE;
  984. jis_kso = '@';
  985. jis_ksi = 'J';
  986. } else if (strequal (str, "j8@h")) {
  987. codes = JIS8_CODE;
  988. jis_kso = '@';
  989. jis_ksi = 'H';
  990. } else if (strequal (str, "j7bb")) {
  991. codes = JIS7_CODE;
  992. jis_kso = 'B';
  993. jis_ksi = 'B';
  994. } else if (strequal (str, "j7bj") || strequal (str, "jis7")) {
  995. codes = JIS7_CODE;
  996. jis_kso = 'B';
  997. jis_ksi = 'J';
  998. } else if (strequal (str, "j7bh")) {
  999. codes = JIS7_CODE;
  1000. jis_kso = 'B';
  1001. jis_ksi = 'H';
  1002. } else if (strequal (str, "j7@b")) {
  1003. codes = JIS7_CODE;
  1004. jis_kso = '@';
  1005. jis_ksi = 'B';
  1006. } else if (strequal (str, "j7@j")) {
  1007. codes = JIS7_CODE;
  1008. jis_kso = '@';
  1009. jis_ksi = 'J';
  1010. } else if (strequal (str, "j7@h")) {
  1011. codes = JIS7_CODE;
  1012. jis_kso = '@';
  1013. jis_ksi = 'H';
  1014. } else if (strequal (str, "jubb")) {
  1015. codes = JUNET_CODE;
  1016. jis_kso = 'B';
  1017. jis_ksi = 'B';
  1018. } else if (strequal (str, "jubj") || strequal (str, "junet")) {
  1019. codes = JUNET_CODE;
  1020. jis_kso = 'B';
  1021. jis_ksi = 'J';
  1022. } else if (strequal (str, "jubh")) {
  1023. codes = JUNET_CODE;
  1024. jis_kso = 'B';
  1025. jis_ksi = 'H';
  1026. } else if (strequal (str, "ju@b")) {
  1027. codes = JUNET_CODE;
  1028. jis_kso = '@';
  1029. jis_ksi = 'B';
  1030. } else if (strequal (str, "ju@j")) {
  1031. codes = JUNET_CODE;
  1032. jis_kso = '@';
  1033. jis_ksi = 'J';
  1034. } else if (strequal (str, "ju@h")) {
  1035. codes = JUNET_CODE;
  1036. jis_kso = '@';
  1037. jis_ksi = 'H';
  1038. }
  1039. setup_string_function (codes);
  1040. }
  1041. /*******************************************************************
  1042. Non multibyte char function.
  1043. *******************************************************************/
  1044. static size_t skip_non_multibyte_char(char c)
  1045. {
  1046. (void) c;
  1047. return 0;
  1048. }
  1049. /*******************************************************************
  1050. Function that always says a character isn't multibyte.
  1051. *******************************************************************/
  1052. static BOOL not_multibyte_char_1(char c)
  1053. {
  1054. (void) c;
  1055. return False;
  1056. }
  1057. /*******************************************************************
  1058. Function to determine if we are in a multibyte code page.
  1059. *******************************************************************/
  1060. static BOOL is_multibyte_codepage_val = False;
  1061. BOOL is_multibyte_codepage(void)
  1062. {
  1063. return is_multibyte_codepage_val;
  1064. }
  1065. /*******************************************************************
  1066. Setup the function pointers for the functions that are replaced
  1067. when multi-byte codepages are used.
  1068. The dos_to_unix and unix_to_dos function pointers are only
  1069. replaced by setup_string_function called by interpret_coding_system
  1070. above.
  1071. *******************************************************************/
  1072. void initialize_multibyte_vectors( int client_codepage)
  1073. {
  1074. switch( client_codepage )
  1075. {
  1076. case KANJI_CODEPAGE:
  1077. multibyte_strchr = sj_strchr;
  1078. multibyte_strrchr = sj_strrchr;
  1079. multibyte_strstr = sj_strstr;
  1080. multibyte_strtok = sj_strtok;
  1081. _skip_multibyte_char = skip_kanji_multibyte_char;
  1082. is_multibyte_char_1 = is_kanji_multibyte_char_1;
  1083. is_multibyte_codepage_val = True;
  1084. break;
  1085. case HANGUL_CODEPAGE:
  1086. multibyte_strchr = generic_multibyte_strchr;
  1087. multibyte_strrchr = generic_multibyte_strrchr;
  1088. multibyte_strstr = generic_multibyte_strstr;
  1089. multibyte_strtok = generic_multibyte_strtok;
  1090. _skip_multibyte_char = skip_generic_multibyte_char;
  1091. is_multibyte_char_1 = hangul_is_multibyte_char_1;
  1092. is_multibyte_codepage_val = True;
  1093. break;
  1094. case BIG5_CODEPAGE:
  1095. multibyte_strchr = generic_multibyte_strchr;
  1096. multibyte_strrchr = generic_multibyte_strrchr;
  1097. multibyte_strstr = generic_multibyte_strstr;
  1098. multibyte_strtok = generic_multibyte_strtok;
  1099. _skip_multibyte_char = skip_generic_multibyte_char;
  1100. is_multibyte_char_1 = big5_is_multibyte_char_1;
  1101. is_multibyte_codepage_val = True;
  1102. break;
  1103. case SIMPLIFIED_CHINESE_CODEPAGE:
  1104. multibyte_strchr = generic_multibyte_strchr;
  1105. multibyte_strrchr = generic_multibyte_strrchr;
  1106. multibyte_strstr = generic_multibyte_strstr;
  1107. multibyte_strtok = generic_multibyte_strtok;
  1108. _skip_multibyte_char = skip_generic_multibyte_char;
  1109. is_multibyte_char_1 = simpch_is_multibyte_char_1;
  1110. is_multibyte_codepage_val = True;
  1111. break;
  1112. /*
  1113. * Single char size code page.
  1114. */
  1115. default:
  1116. multibyte_strchr = (const char *(*)(const char *, int )) strchr;
  1117. multibyte_strrchr = (const char *(*)(const char *, int )) strrchr;
  1118. multibyte_strstr = (const char *(*)(const char *, const char *)) strstr;
  1119. multibyte_strtok = (char *(*)(char *, const char *)) strtok;
  1120. _skip_multibyte_char = skip_non_multibyte_char;
  1121. is_multibyte_char_1 = not_multibyte_char_1;
  1122. is_multibyte_codepage_val = False;
  1123. break;
  1124. }
  1125. }