strutil.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. /* common strings utilities
  2. Copyright (C) 2007 Free Software Foundation, Inc.
  3. Written 2007 by:
  4. Rostislav Benes
  5. The file_date routine is mostly from GNU's fileutils package,
  6. written by Richard Stallman and David MacKenzie.
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #include <config.h>
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <langinfo.h>
  23. #include <string.h>
  24. #include <errno.h>
  25. #include <stdarg.h>
  26. #include "lib/global.h"
  27. #include "lib/strutil.h"
  28. /*names, that are used for utf-8 */
  29. static const char *str_utf8_encodings[] = {
  30. "utf-8",
  31. "utf8",
  32. NULL
  33. };
  34. /* standard 8bit encodings, no wide or multibytes characters */
  35. static const char *str_8bit_encodings[] = {
  36. "cp-1251",
  37. "cp1251",
  38. "cp-1250",
  39. "cp1250",
  40. "cp-866",
  41. "cp866",
  42. "ibm-866",
  43. "ibm866",
  44. "cp-850",
  45. "cp850",
  46. "cp-852",
  47. "cp852",
  48. "iso-8859",
  49. "iso8859",
  50. "koi8",
  51. NULL
  52. };
  53. /* terminal encoding */
  54. static char *codeset = NULL;
  55. /* function for encoding specific operations */
  56. static struct str_class used_class;
  57. GIConv str_cnv_to_term;
  58. GIConv str_cnv_from_term;
  59. GIConv str_cnv_not_convert;
  60. /* if enc is same encoding like on terminal */
  61. static int
  62. str_test_not_convert (const char *enc)
  63. {
  64. return g_ascii_strcasecmp (enc, codeset) == 0;
  65. }
  66. GIConv
  67. str_crt_conv_to (const char *to_enc)
  68. {
  69. return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  70. }
  71. GIConv
  72. str_crt_conv_from (const char *from_enc)
  73. {
  74. return (!str_test_not_convert (from_enc))
  75. ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  76. }
  77. void
  78. str_close_conv (GIConv conv)
  79. {
  80. if (conv != str_cnv_not_convert)
  81. g_iconv_close (conv);
  82. }
  83. static estr_t
  84. _str_convert (GIConv coder, const char *string, int size, GString * buffer)
  85. {
  86. estr_t state = ESTR_SUCCESS;
  87. gchar *tmp_buff = NULL;
  88. gssize left;
  89. gsize bytes_read = 0;
  90. gsize bytes_written = 0;
  91. GError *error = NULL;
  92. errno = 0;
  93. if (coder == INVALID_CONV)
  94. return ESTR_FAILURE;
  95. if (string == NULL || buffer == NULL)
  96. return ESTR_FAILURE;
  97. /*
  98. if (! used_class.is_valid_string (string))
  99. {
  100. return ESTR_FAILURE;
  101. }
  102. */
  103. if (size < 0)
  104. {
  105. size = strlen (string);
  106. }
  107. else
  108. {
  109. left = strlen (string);
  110. if (left < size)
  111. size = left;
  112. }
  113. left = size;
  114. g_iconv (coder, NULL, NULL, NULL, NULL);
  115. while (left)
  116. {
  117. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  118. left, coder, &bytes_read, &bytes_written, &error);
  119. if (error)
  120. {
  121. int code = error->code;
  122. g_error_free (error);
  123. error = NULL;
  124. switch (code)
  125. {
  126. case G_CONVERT_ERROR_NO_CONVERSION:
  127. /* Conversion between the requested character sets is not supported. */
  128. tmp_buff = g_strnfill (strlen (string), '?');
  129. g_string_append (buffer, tmp_buff);
  130. g_free (tmp_buff);
  131. return ESTR_FAILURE;
  132. case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
  133. /* Invalid byte sequence in conversion input. */
  134. if ((tmp_buff == NULL) && (bytes_read != 0))
  135. /* recode valid byte sequence */
  136. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  137. bytes_read, coder, NULL, NULL, NULL);
  138. if (tmp_buff != NULL)
  139. {
  140. g_string_append (buffer, tmp_buff);
  141. g_free (tmp_buff);
  142. }
  143. if ((int) bytes_read < left)
  144. {
  145. string += bytes_read + 1;
  146. size -= (bytes_read + 1);
  147. left -= (bytes_read + 1);
  148. g_string_append_c (buffer, *(string - 1));
  149. }
  150. else
  151. {
  152. return ESTR_PROBLEM;
  153. }
  154. state = ESTR_PROBLEM;
  155. break;
  156. case G_CONVERT_ERROR_PARTIAL_INPUT:
  157. /* Partial character sequence at end of input. */
  158. g_string_append (buffer, tmp_buff);
  159. g_free (tmp_buff);
  160. if ((int) bytes_read < left)
  161. {
  162. left = left - bytes_read;
  163. tmp_buff = g_strnfill (left, '?');
  164. g_string_append (buffer, tmp_buff);
  165. g_free (tmp_buff);
  166. }
  167. return ESTR_PROBLEM;
  168. case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
  169. case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
  170. case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
  171. default:
  172. g_free (tmp_buff);
  173. return ESTR_FAILURE;
  174. }
  175. }
  176. else
  177. {
  178. if (tmp_buff != NULL)
  179. {
  180. if (*tmp_buff)
  181. {
  182. g_string_append (buffer, tmp_buff);
  183. g_free (tmp_buff);
  184. string += bytes_read;
  185. left -= bytes_read;
  186. }
  187. else
  188. {
  189. g_free (tmp_buff);
  190. g_string_append (buffer, string);
  191. return state;
  192. }
  193. }
  194. else
  195. {
  196. g_string_append (buffer, string);
  197. return ESTR_PROBLEM;
  198. }
  199. }
  200. }
  201. return state;
  202. }
  203. estr_t
  204. str_convert (GIConv coder, const char *string, GString * buffer)
  205. {
  206. return _str_convert (coder, string, -1, buffer);
  207. }
  208. estr_t
  209. str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
  210. {
  211. return _str_convert (coder, string, size, buffer);
  212. }
  213. gchar *
  214. str_conv_gerror_message (GError * error, const char *def_msg)
  215. {
  216. return used_class.conv_gerror_message (error, def_msg);
  217. }
  218. estr_t
  219. str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
  220. {
  221. estr_t result;
  222. if (coder == str_cnv_not_convert)
  223. {
  224. g_string_append (buffer, string != NULL ? string : "");
  225. result = ESTR_SUCCESS;
  226. }
  227. else
  228. result = _str_convert (coder, string, -1, buffer);
  229. return result;
  230. }
  231. estr_t
  232. str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  233. {
  234. return used_class.vfs_convert_to (coder, string, size, buffer);
  235. }
  236. void
  237. str_printf (GString * buffer, const char *format, ...)
  238. {
  239. va_list ap;
  240. va_start (ap, format);
  241. #if GLIB_CHECK_VERSION (2, 14, 0)
  242. g_string_append_vprintf (buffer, format, ap);
  243. #else
  244. {
  245. gchar *tmp;
  246. tmp = g_strdup_vprintf (format, ap);
  247. g_string_append (buffer, tmp);
  248. g_free (tmp);
  249. }
  250. #endif
  251. va_end (ap);
  252. }
  253. void
  254. str_insert_replace_char (GString * buffer)
  255. {
  256. used_class.insert_replace_char (buffer);
  257. }
  258. estr_t
  259. str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
  260. {
  261. size_t left;
  262. size_t cnv;
  263. g_iconv (conv, NULL, NULL, NULL, NULL);
  264. left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
  265. cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
  266. if (cnv == (size_t) (-1))
  267. {
  268. return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
  269. }
  270. else
  271. {
  272. output[0] = '\0';
  273. return ESTR_SUCCESS;
  274. }
  275. }
  276. const char *
  277. str_detect_termencoding (void)
  278. {
  279. return (nl_langinfo (CODESET));
  280. }
  281. static int
  282. str_test_encoding_class (const char *encoding, const char **table)
  283. {
  284. int t;
  285. int result = 0;
  286. if (encoding == NULL)
  287. return result;
  288. for (t = 0; table[t] != NULL; t++)
  289. {
  290. result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
  291. }
  292. return result;
  293. }
  294. static void
  295. str_choose_str_functions ()
  296. {
  297. if (str_test_encoding_class (codeset, str_utf8_encodings))
  298. {
  299. used_class = str_utf8_init ();
  300. }
  301. else if (str_test_encoding_class (codeset, str_8bit_encodings))
  302. {
  303. used_class = str_8bit_init ();
  304. }
  305. else
  306. {
  307. used_class = str_ascii_init ();
  308. }
  309. }
  310. int
  311. str_isutf8 (const char *codeset_name)
  312. {
  313. int result = 0;
  314. if (str_test_encoding_class (codeset_name, str_utf8_encodings))
  315. {
  316. result = 1;
  317. }
  318. return result;
  319. }
  320. void
  321. str_init_strings (const char *termenc)
  322. {
  323. codeset = g_strdup ((termenc != NULL) ? termenc : str_detect_termencoding ());
  324. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  325. if (str_cnv_not_convert == INVALID_CONV)
  326. {
  327. if (termenc != NULL)
  328. {
  329. g_free (codeset);
  330. codeset = g_strdup (str_detect_termencoding ());
  331. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  332. }
  333. if (str_cnv_not_convert == INVALID_CONV)
  334. {
  335. g_free (codeset);
  336. codeset = g_strdup ("ascii");
  337. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  338. }
  339. }
  340. str_cnv_to_term = str_cnv_not_convert;
  341. str_cnv_from_term = str_cnv_not_convert;
  342. str_choose_str_functions ();
  343. }
  344. void
  345. str_uninit_strings (void)
  346. {
  347. if (str_cnv_not_convert != INVALID_CONV)
  348. g_iconv_close (str_cnv_not_convert);
  349. g_free (codeset);
  350. }
  351. const char *
  352. str_term_form (const char *text)
  353. {
  354. return used_class.term_form (text);
  355. }
  356. const char *
  357. str_fit_to_term (const char *text, int width, align_crt_t just_mode)
  358. {
  359. return used_class.fit_to_term (text, width, just_mode);
  360. }
  361. const char *
  362. str_term_trim (const char *text, int width)
  363. {
  364. return used_class.term_trim (text, width);
  365. }
  366. const char *
  367. str_term_substring (const char *text, int start, int width)
  368. {
  369. return used_class.term_substring (text, start, width);
  370. }
  371. char *
  372. str_get_next_char (char *text)
  373. {
  374. used_class.cnext_char ((const char **) &text);
  375. return text;
  376. }
  377. const char *
  378. str_cget_next_char (const char *text)
  379. {
  380. used_class.cnext_char (&text);
  381. return text;
  382. }
  383. void
  384. str_next_char (char **text)
  385. {
  386. used_class.cnext_char ((const char **) text);
  387. }
  388. void
  389. str_cnext_char (const char **text)
  390. {
  391. used_class.cnext_char (text);
  392. }
  393. char *
  394. str_get_prev_char (char *text)
  395. {
  396. used_class.cprev_char ((const char **) &text);
  397. return text;
  398. }
  399. const char *
  400. str_cget_prev_char (const char *text)
  401. {
  402. used_class.cprev_char (&text);
  403. return text;
  404. }
  405. void
  406. str_prev_char (char **text)
  407. {
  408. used_class.cprev_char ((const char **) text);
  409. }
  410. void
  411. str_cprev_char (const char **text)
  412. {
  413. used_class.cprev_char (text);
  414. }
  415. char *
  416. str_get_next_char_safe (char *text)
  417. {
  418. used_class.cnext_char_safe ((const char **) &text);
  419. return text;
  420. }
  421. const char *
  422. str_cget_next_char_safe (const char *text)
  423. {
  424. used_class.cnext_char_safe (&text);
  425. return text;
  426. }
  427. void
  428. str_next_char_safe (char **text)
  429. {
  430. used_class.cnext_char_safe ((const char **) text);
  431. }
  432. void
  433. str_cnext_char_safe (const char **text)
  434. {
  435. used_class.cnext_char_safe (text);
  436. }
  437. char *
  438. str_get_prev_char_safe (char *text)
  439. {
  440. used_class.cprev_char_safe ((const char **) &text);
  441. return text;
  442. }
  443. const char *
  444. str_cget_prev_char_safe (const char *text)
  445. {
  446. used_class.cprev_char_safe (&text);
  447. return text;
  448. }
  449. void
  450. str_prev_char_safe (char **text)
  451. {
  452. used_class.cprev_char_safe ((const char **) text);
  453. }
  454. void
  455. str_cprev_char_safe (const char **text)
  456. {
  457. used_class.cprev_char_safe (text);
  458. }
  459. int
  460. str_next_noncomb_char (char **text)
  461. {
  462. return used_class.cnext_noncomb_char ((const char **) text);
  463. }
  464. int
  465. str_cnext_noncomb_char (const char **text)
  466. {
  467. return used_class.cnext_noncomb_char (text);
  468. }
  469. int
  470. str_prev_noncomb_char (char **text, const char *begin)
  471. {
  472. return used_class.cprev_noncomb_char ((const char **) text, begin);
  473. }
  474. int
  475. str_cprev_noncomb_char (const char **text, const char *begin)
  476. {
  477. return used_class.cprev_noncomb_char (text, begin);
  478. }
  479. int
  480. str_is_valid_char (const char *ch, size_t size)
  481. {
  482. return used_class.is_valid_char (ch, size);
  483. }
  484. int
  485. str_term_width1 (const char *text)
  486. {
  487. return used_class.term_width1 (text);
  488. }
  489. int
  490. str_term_width2 (const char *text, size_t length)
  491. {
  492. return used_class.term_width2 (text, length);
  493. }
  494. int
  495. str_term_char_width (const char *text)
  496. {
  497. return used_class.term_char_width (text);
  498. }
  499. int
  500. str_offset_to_pos (const char *text, size_t length)
  501. {
  502. return used_class.offset_to_pos (text, length);
  503. }
  504. int
  505. str_length (const char *text)
  506. {
  507. return used_class.length (text);
  508. }
  509. int
  510. str_length_char (const char *text)
  511. {
  512. return str_cget_next_char_safe (text) - text;
  513. }
  514. int
  515. str_length2 (const char *text, int size)
  516. {
  517. return used_class.length2 (text, size);
  518. }
  519. int
  520. str_length_noncomb (const char *text)
  521. {
  522. return used_class.length_noncomb (text);
  523. }
  524. int
  525. str_column_to_pos (const char *text, size_t pos)
  526. {
  527. return used_class.column_to_pos (text, pos);
  528. }
  529. int
  530. str_isspace (const char *ch)
  531. {
  532. return used_class.isspace (ch);
  533. }
  534. int
  535. str_ispunct (const char *ch)
  536. {
  537. return used_class.ispunct (ch);
  538. }
  539. int
  540. str_isalnum (const char *ch)
  541. {
  542. return used_class.isalnum (ch);
  543. }
  544. int
  545. str_isdigit (const char *ch)
  546. {
  547. return used_class.isdigit (ch);
  548. }
  549. int
  550. str_toupper (const char *ch, char **out, size_t * remain)
  551. {
  552. return used_class.toupper (ch, out, remain);
  553. }
  554. int
  555. str_tolower (const char *ch, char **out, size_t * remain)
  556. {
  557. return used_class.tolower (ch, out, remain);
  558. }
  559. int
  560. str_isprint (const char *ch)
  561. {
  562. return used_class.isprint (ch);
  563. }
  564. int
  565. str_iscombiningmark (const char *ch)
  566. {
  567. return used_class.iscombiningmark (ch);
  568. }
  569. const char *
  570. str_trunc (const char *text, int width)
  571. {
  572. return used_class.trunc (text, width);
  573. }
  574. char *
  575. str_create_search_needle (const char *needle, int case_sen)
  576. {
  577. return used_class.create_search_needle (needle, case_sen);
  578. }
  579. void
  580. str_release_search_needle (char *needle, int case_sen)
  581. {
  582. used_class.release_search_needle (needle, case_sen);
  583. }
  584. const char *
  585. str_search_first (const char *text, const char *search, int case_sen)
  586. {
  587. return used_class.search_first (text, search, case_sen);
  588. }
  589. const char *
  590. str_search_last (const char *text, const char *search, int case_sen)
  591. {
  592. return used_class.search_last (text, search, case_sen);
  593. }
  594. int
  595. str_is_valid_string (const char *text)
  596. {
  597. return used_class.is_valid_string (text);
  598. }
  599. int
  600. str_compare (const char *t1, const char *t2)
  601. {
  602. return used_class.compare (t1, t2);
  603. }
  604. int
  605. str_ncompare (const char *t1, const char *t2)
  606. {
  607. return used_class.ncompare (t1, t2);
  608. }
  609. int
  610. str_casecmp (const char *t1, const char *t2)
  611. {
  612. return used_class.casecmp (t1, t2);
  613. }
  614. int
  615. str_ncasecmp (const char *t1, const char *t2)
  616. {
  617. return used_class.ncasecmp (t1, t2);
  618. }
  619. int
  620. str_prefix (const char *text, const char *prefix)
  621. {
  622. return used_class.prefix (text, prefix);
  623. }
  624. int
  625. str_caseprefix (const char *text, const char *prefix)
  626. {
  627. return used_class.caseprefix (text, prefix);
  628. }
  629. void
  630. str_fix_string (char *text)
  631. {
  632. used_class.fix_string (text);
  633. }
  634. char *
  635. str_create_key (const char *text, int case_sen)
  636. {
  637. return used_class.create_key (text, case_sen);
  638. }
  639. char *
  640. str_create_key_for_filename (const char *text, int case_sen)
  641. {
  642. return used_class.create_key_for_filename (text, case_sen);
  643. }
  644. int
  645. str_key_collate (const char *t1, const char *t2, int case_sen)
  646. {
  647. return used_class.key_collate (t1, t2, case_sen);
  648. }
  649. void
  650. str_release_key (char *key, int case_sen)
  651. {
  652. used_class.release_key (key, case_sen);
  653. }
  654. void
  655. str_msg_term_size (const char *text, int *lines, int *columns)
  656. {
  657. char *p, *tmp;
  658. char *q;
  659. char c = '\0';
  660. int width;
  661. *lines = 1;
  662. *columns = 0;
  663. tmp = g_strdup (text);
  664. p = tmp;
  665. while (TRUE)
  666. {
  667. q = strchr (p, '\n');
  668. if (q != NULL)
  669. {
  670. c = q[0];
  671. q[0] = '\0';
  672. }
  673. width = str_term_width1 (p);
  674. if (width > *columns)
  675. *columns = width;
  676. if (q == NULL)
  677. break;
  678. q[0] = c;
  679. p = q + 1;
  680. (*lines)++;
  681. }
  682. g_free (tmp);
  683. }