strutil.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. /* common strings utilities
  2. Copyright (C) 2007 Free Software Foundation, Inc.
  3. Written 2007 by:
  4. Rostislav Benes
  5. The file_date routine is mostly from GNU's fileutils package,
  6. written by Richard Stallman and David MacKenzie.
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #include <config.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <langinfo.h>
  23. #include <string.h>
  24. #include <errno.h>
  25. #include <stdarg.h>
  26. #include "lib/global.h"
  27. #include "lib/strutil.h"
  28. /*names, that are used for utf-8 */
  29. static const char *str_utf8_encodings[] = {
  30. "utf-8",
  31. "utf8",
  32. NULL
  33. };
  34. /* standard 8bit encodings, no wide or multibytes characters*/
  35. static const char *str_8bit_encodings[] = {
  36. "cp-1251",
  37. "cp1251",
  38. "cp-1250",
  39. "cp1250",
  40. "cp-866",
  41. "cp866",
  42. "ibm-866",
  43. "ibm866",
  44. "cp-850",
  45. "cp850",
  46. "cp-852",
  47. "cp852",
  48. "iso-8859",
  49. "iso8859",
  50. "koi8",
  51. NULL
  52. };
  53. /* terminal encoding*/
  54. static char *codeset = NULL;
  55. /* function for encoding specific operations*/
  56. static struct str_class used_class;
  57. GIConv str_cnv_to_term;
  58. GIConv str_cnv_from_term;
  59. GIConv str_cnv_not_convert;
  60. /* if enc is same encoding like on terminal*/
  61. static int
  62. str_test_not_convert (const char *enc)
  63. {
  64. return g_ascii_strcasecmp (enc, codeset) == 0;
  65. }
  66. GIConv
  67. str_crt_conv_to (const char *to_enc)
  68. {
  69. return (!str_test_not_convert (to_enc))
  70. ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  71. }
  72. GIConv
  73. str_crt_conv_from (const char *from_enc)
  74. {
  75. return (!str_test_not_convert (from_enc))
  76. ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  77. }
  78. void
  79. str_close_conv (GIConv conv)
  80. {
  81. if (conv != str_cnv_not_convert)
  82. g_iconv_close (conv);
  83. }
  84. static estr_t
  85. _str_convert (GIConv coder, const char *string, int size, GString * buffer)
  86. {
  87. estr_t state = ESTR_SUCCESS;
  88. gchar *tmp_buff = NULL;
  89. gssize left;
  90. gsize bytes_read = 0;
  91. gsize bytes_written = 0;
  92. GError *error = NULL;
  93. errno = 0;
  94. if (coder == INVALID_CONV)
  95. return ESTR_FAILURE;
  96. if (string == NULL || buffer == NULL)
  97. return ESTR_FAILURE;
  98. /*
  99. if (! used_class.is_valid_string (string))
  100. {
  101. return ESTR_FAILURE;
  102. }
  103. */
  104. if (size < 0)
  105. {
  106. size = strlen (string);
  107. }
  108. else
  109. {
  110. left = strlen (string);
  111. if (left < size)
  112. size = left;
  113. }
  114. left = size;
  115. g_iconv (coder, NULL, NULL, NULL, NULL);
  116. while (left)
  117. {
  118. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  119. left,
  120. coder,
  121. &bytes_read,
  122. &bytes_written, &error);
  123. if (error)
  124. {
  125. int code = error->code;
  126. g_error_free (error);
  127. error = NULL;
  128. switch (code)
  129. {
  130. case G_CONVERT_ERROR_NO_CONVERSION:
  131. /* Conversion between the requested character sets is not supported. */
  132. tmp_buff = g_strnfill (strlen (string), '?');
  133. g_string_append (buffer, tmp_buff);
  134. g_free (tmp_buff);
  135. return ESTR_FAILURE;
  136. case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
  137. /* Invalid byte sequence in conversion input. */
  138. if ((tmp_buff == NULL) && (bytes_read != 0))
  139. /* recode valid byte sequence */
  140. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  141. bytes_read,
  142. coder, NULL, NULL, NULL);
  143. if (tmp_buff != NULL)
  144. {
  145. g_string_append (buffer, tmp_buff);
  146. g_free (tmp_buff);
  147. }
  148. if ((int)bytes_read < left)
  149. {
  150. string += bytes_read + 1;
  151. size -= (bytes_read + 1);
  152. left -= (bytes_read + 1);
  153. g_string_append_c (buffer, *(string-1));
  154. }
  155. else
  156. {
  157. return ESTR_PROBLEM;
  158. }
  159. state = ESTR_PROBLEM;
  160. break;
  161. case G_CONVERT_ERROR_PARTIAL_INPUT:
  162. /* Partial character sequence at end of input. */
  163. g_string_append (buffer, tmp_buff);
  164. g_free (tmp_buff);
  165. if ((int)bytes_read < left)
  166. {
  167. left = left - bytes_read;
  168. tmp_buff = g_strnfill (left, '?');
  169. g_string_append (buffer, tmp_buff);
  170. g_free (tmp_buff);
  171. }
  172. return ESTR_PROBLEM;
  173. case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
  174. case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
  175. case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
  176. default:
  177. g_free (tmp_buff);
  178. return ESTR_FAILURE;
  179. }
  180. }
  181. else
  182. {
  183. if (tmp_buff != NULL)
  184. {
  185. if (*tmp_buff)
  186. {
  187. g_string_append (buffer, tmp_buff);
  188. g_free (tmp_buff);
  189. string += bytes_read;
  190. left -= bytes_read;
  191. }
  192. else
  193. {
  194. g_free (tmp_buff);
  195. g_string_append (buffer, string);
  196. return state;
  197. }
  198. }
  199. else
  200. {
  201. g_string_append (buffer, string);
  202. return ESTR_PROBLEM;
  203. }
  204. }
  205. }
  206. return state;
  207. }
  208. estr_t
  209. str_convert (GIConv coder, const char *string, GString * buffer)
  210. {
  211. return _str_convert (coder, string, -1, buffer);
  212. }
  213. estr_t
  214. str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
  215. {
  216. return _str_convert (coder, string, size, buffer);
  217. }
  218. gchar *
  219. str_conv_gerror_message (GError *error, const char *def_msg)
  220. {
  221. return used_class.conv_gerror_message (error, def_msg);
  222. }
  223. estr_t
  224. str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
  225. {
  226. estr_t result;
  227. if (coder == str_cnv_not_convert)
  228. {
  229. g_string_append (buffer, string != NULL ? string : "");
  230. result = ESTR_SUCCESS;
  231. }
  232. else
  233. result = _str_convert (coder, string, -1, buffer);
  234. return result;
  235. }
  236. estr_t
  237. str_vfs_convert_to (GIConv coder, const char *string, int size,
  238. GString * buffer)
  239. {
  240. return used_class.vfs_convert_to (coder, string, size, buffer);
  241. }
  242. void
  243. str_printf (GString * buffer, const char *format, ...)
  244. {
  245. va_list ap;
  246. va_start (ap, format);
  247. #if GLIB_CHECK_VERSION (2, 14, 0)
  248. g_string_append_vprintf (buffer, format, ap);
  249. #else
  250. {
  251. gchar *tmp;
  252. tmp = g_strdup_vprintf (format, ap);
  253. g_string_append (buffer, tmp);
  254. g_free(tmp);
  255. }
  256. #endif
  257. va_end (ap);
  258. }
  259. void
  260. str_insert_replace_char (GString * buffer)
  261. {
  262. used_class.insert_replace_char (buffer);
  263. }
  264. estr_t
  265. str_translate_char (GIConv conv, const char *keys, size_t ch_size,
  266. char *output, size_t out_size)
  267. {
  268. size_t left;
  269. size_t cnv;
  270. g_iconv (conv, NULL, NULL, NULL, NULL);
  271. left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
  272. cnv = g_iconv (conv, (gchar **) &keys, &left, &output, &out_size);
  273. if (cnv == (size_t)(-1)) {
  274. return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
  275. } else {
  276. output[0] = '\0';
  277. return ESTR_SUCCESS;
  278. }
  279. }
  280. const char *
  281. str_detect_termencoding (void)
  282. {
  283. return (nl_langinfo (CODESET));
  284. }
  285. static int
  286. str_test_encoding_class (const char *encoding, const char **table)
  287. {
  288. int t;
  289. int result = 0;
  290. if ( encoding == NULL )
  291. return result;
  292. for (t = 0; table[t] != NULL; t++)
  293. {
  294. result += (g_ascii_strncasecmp (encoding, table[t],
  295. strlen (table[t])) == 0);
  296. }
  297. return result;
  298. }
  299. static void
  300. str_choose_str_functions ()
  301. {
  302. if (str_test_encoding_class (codeset, str_utf8_encodings))
  303. {
  304. used_class = str_utf8_init ();
  305. }
  306. else if (str_test_encoding_class (codeset, str_8bit_encodings))
  307. {
  308. used_class = str_8bit_init ();
  309. }
  310. else
  311. {
  312. used_class = str_ascii_init ();
  313. }
  314. }
  315. int
  316. str_isutf8 (const char *codeset_name)
  317. {
  318. int result = 0;
  319. if (str_test_encoding_class (codeset_name, str_utf8_encodings))
  320. {
  321. result = 1;
  322. }
  323. return result;
  324. }
  325. void
  326. str_init_strings (const char *termenc)
  327. {
  328. codeset = g_strdup ((termenc != NULL)
  329. ? termenc
  330. : str_detect_termencoding ());
  331. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  332. if (str_cnv_not_convert == INVALID_CONV)
  333. {
  334. if (termenc != NULL)
  335. {
  336. g_free (codeset);
  337. codeset = g_strdup (str_detect_termencoding ());
  338. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  339. }
  340. if (str_cnv_not_convert == INVALID_CONV)
  341. {
  342. g_free (codeset);
  343. codeset = g_strdup ("ascii");
  344. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  345. }
  346. }
  347. str_cnv_to_term = str_cnv_not_convert;
  348. str_cnv_from_term = str_cnv_not_convert;
  349. str_choose_str_functions ();
  350. }
  351. void
  352. str_uninit_strings (void)
  353. {
  354. if (str_cnv_not_convert != INVALID_CONV)
  355. g_iconv_close (str_cnv_not_convert);
  356. g_free (codeset);
  357. }
  358. const char *
  359. str_term_form (const char *text)
  360. {
  361. return used_class.term_form (text);
  362. }
  363. const char *
  364. str_fit_to_term (const char *text, int width, align_crt_t just_mode)
  365. {
  366. return used_class.fit_to_term (text, width, just_mode);
  367. }
  368. const char *
  369. str_term_trim (const char *text, int width)
  370. {
  371. return used_class.term_trim (text, width);
  372. }
  373. void
  374. str_msg_term_size (const char *text, int *lines, int *columns)
  375. {
  376. used_class.msg_term_size (text, lines, columns);
  377. }
  378. const char *
  379. str_term_substring (const char *text, int start, int width)
  380. {
  381. return used_class.term_substring (text, start, width);
  382. }
  383. char *
  384. str_get_next_char (char *text)
  385. {
  386. used_class.cnext_char ((const char **) &text);
  387. return text;
  388. }
  389. const char *
  390. str_cget_next_char (const char *text)
  391. {
  392. used_class.cnext_char(&text);
  393. return text;
  394. }
  395. void
  396. str_next_char (char **text)
  397. {
  398. used_class.cnext_char ((const char **) text);
  399. }
  400. void
  401. str_cnext_char (const char **text)
  402. {
  403. used_class.cnext_char (text);
  404. }
  405. char *
  406. str_get_prev_char (char *text)
  407. {
  408. used_class.cprev_char ((const char **) &text);
  409. return text;
  410. }
  411. const char *
  412. str_cget_prev_char (const char *text)
  413. {
  414. used_class.cprev_char (&text);
  415. return text;
  416. }
  417. void
  418. str_prev_char (char **text)
  419. {
  420. used_class.cprev_char ((const char **) text);
  421. }
  422. void
  423. str_cprev_char (const char **text)
  424. {
  425. used_class.cprev_char (text);
  426. }
  427. char *
  428. str_get_next_char_safe (char *text)
  429. {
  430. used_class.cnext_char_safe ((const char **) &text);
  431. return text;
  432. }
  433. const char *
  434. str_cget_next_char_safe (const char *text)
  435. {
  436. used_class.cnext_char_safe (&text);
  437. return text;
  438. }
  439. void
  440. str_next_char_safe (char **text)
  441. {
  442. used_class.cnext_char_safe ((const char **) text);
  443. }
  444. void
  445. str_cnext_char_safe (const char **text)
  446. {
  447. used_class.cnext_char_safe (text);
  448. }
  449. char *
  450. str_get_prev_char_safe (char *text)
  451. {
  452. used_class.cprev_char_safe ((const char **) &text);
  453. return text;
  454. }
  455. const char *
  456. str_cget_prev_char_safe (const char *text)
  457. {
  458. used_class.cprev_char_safe (&text);
  459. return text;
  460. }
  461. void
  462. str_prev_char_safe (char **text)
  463. {
  464. used_class.cprev_char_safe ((const char **) text);
  465. }
  466. void
  467. str_cprev_char_safe (const char **text)
  468. {
  469. used_class.cprev_char_safe (text);
  470. }
  471. int
  472. str_next_noncomb_char (char **text)
  473. {
  474. return used_class.cnext_noncomb_char ((const char **) text);
  475. }
  476. int
  477. str_cnext_noncomb_char (const char **text)
  478. {
  479. return used_class.cnext_noncomb_char (text);
  480. }
  481. int
  482. str_prev_noncomb_char (char **text, const char *begin)
  483. {
  484. return used_class.cprev_noncomb_char ((const char **) text, begin);
  485. }
  486. int
  487. str_cprev_noncomb_char (const char **text, const char *begin)
  488. {
  489. return used_class.cprev_noncomb_char (text, begin);
  490. }
  491. int
  492. str_is_valid_char (const char *ch, size_t size)
  493. {
  494. return used_class.is_valid_char (ch, size);
  495. }
  496. int
  497. str_term_width1 (const char *text)
  498. {
  499. return used_class.term_width1 (text);
  500. }
  501. int
  502. str_term_width2 (const char *text, size_t length)
  503. {
  504. return used_class.term_width2 (text, length);
  505. }
  506. int
  507. str_term_char_width (const char *text)
  508. {
  509. return used_class.term_char_width (text);
  510. }
  511. int
  512. str_offset_to_pos (const char *text, size_t length)
  513. {
  514. return used_class.offset_to_pos (text, length);
  515. }
  516. int
  517. str_length (const char *text)
  518. {
  519. return used_class.length (text);
  520. }
  521. int
  522. str_length_char (const char *text)
  523. {
  524. return str_cget_next_char_safe (text)-text;
  525. }
  526. int
  527. str_length2 (const char *text, int size)
  528. {
  529. return used_class.length2 (text, size);
  530. }
  531. int
  532. str_length_noncomb (const char *text)
  533. {
  534. return used_class.length_noncomb (text);
  535. }
  536. int
  537. str_column_to_pos (const char *text, size_t pos)
  538. {
  539. return used_class.column_to_pos (text, pos);
  540. }
  541. int
  542. str_isspace (const char *ch)
  543. {
  544. return used_class.isspace (ch);
  545. }
  546. int
  547. str_ispunct (const char *ch)
  548. {
  549. return used_class.ispunct (ch);
  550. }
  551. int
  552. str_isalnum (const char *ch)
  553. {
  554. return used_class.isalnum (ch);
  555. }
  556. int
  557. str_isdigit (const char *ch)
  558. {
  559. return used_class.isdigit (ch);
  560. }
  561. int
  562. str_toupper (const char *ch, char **out, size_t * remain)
  563. {
  564. return used_class.toupper (ch, out, remain);
  565. }
  566. int
  567. str_tolower (const char *ch, char **out, size_t * remain)
  568. {
  569. return used_class.tolower (ch, out, remain);
  570. }
  571. int
  572. str_isprint (const char *ch)
  573. {
  574. return used_class.isprint (ch);
  575. }
  576. int
  577. str_iscombiningmark (const char *ch)
  578. {
  579. return used_class.iscombiningmark (ch);
  580. }
  581. const char *
  582. str_trunc (const char *text, int width)
  583. {
  584. return used_class.trunc (text, width);
  585. }
  586. char *
  587. str_create_search_needle (const char *needle, int case_sen)
  588. {
  589. return used_class.create_search_needle (needle, case_sen);
  590. }
  591. void
  592. str_release_search_needle (char *needle, int case_sen)
  593. {
  594. used_class.release_search_needle (needle, case_sen);
  595. }
  596. const char *
  597. str_search_first (const char *text, const char *search, int case_sen)
  598. {
  599. return used_class.search_first (text, search, case_sen);
  600. }
  601. const char *
  602. str_search_last (const char *text, const char *search, int case_sen)
  603. {
  604. return used_class.search_last (text, search, case_sen);
  605. }
  606. int
  607. str_is_valid_string (const char *text)
  608. {
  609. return used_class.is_valid_string (text);
  610. }
  611. int
  612. str_compare (const char *t1, const char *t2)
  613. {
  614. return used_class.compare (t1, t2);
  615. }
  616. int
  617. str_ncompare (const char *t1, const char *t2)
  618. {
  619. return used_class.ncompare (t1, t2);
  620. }
  621. int
  622. str_casecmp (const char *t1, const char *t2)
  623. {
  624. return used_class.casecmp (t1, t2);
  625. }
  626. int
  627. str_ncasecmp (const char *t1, const char *t2)
  628. {
  629. return used_class.ncasecmp (t1, t2);
  630. }
  631. int
  632. str_prefix (const char *text, const char *prefix)
  633. {
  634. return used_class.prefix (text, prefix);
  635. }
  636. int
  637. str_caseprefix (const char *text, const char *prefix)
  638. {
  639. return used_class.caseprefix (text, prefix);
  640. }
  641. void
  642. str_fix_string (char *text)
  643. {
  644. used_class.fix_string (text);
  645. }
  646. char *
  647. str_create_key (const char *text, int case_sen)
  648. {
  649. return used_class.create_key (text, case_sen);
  650. }
  651. char *
  652. str_create_key_for_filename (const char *text, int case_sen)
  653. {
  654. return used_class.create_key_for_filename (text, case_sen);
  655. }
  656. int
  657. str_key_collate (const char *t1, const char *t2, int case_sen)
  658. {
  659. return used_class.key_collate (t1, t2, case_sen);
  660. }
  661. void
  662. str_release_key (char *key, int case_sen)
  663. {
  664. used_class.release_key (key, case_sen);
  665. }