strutil.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. /*
  2. Common strings utilities
  3. Copyright (C) 2007, 2011
  4. The Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. The file_date routine is mostly from GNU's fileutils package,
  8. written by Richard Stallman and David MacKenzie.
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdlib.h>
  23. #include <stdio.h>
  24. #include <langinfo.h>
  25. #include <string.h>
  26. #include <errno.h>
  27. #include <stdarg.h>
  28. #include "lib/global.h"
  29. #include "lib/strutil.h"
  30. /*names, that are used for utf-8 */
  31. static const char *str_utf8_encodings[] = {
  32. "utf-8",
  33. "utf8",
  34. NULL
  35. };
  36. /* standard 8bit encodings, no wide or multibytes characters */
  37. static const char *str_8bit_encodings[] = {
  38. "cp-1251",
  39. "cp1251",
  40. "cp-1250",
  41. "cp1250",
  42. "cp-866",
  43. "cp866",
  44. "ibm-866",
  45. "ibm866",
  46. "cp-850",
  47. "cp850",
  48. "cp-852",
  49. "cp852",
  50. "iso-8859",
  51. "iso8859",
  52. "koi8",
  53. NULL
  54. };
  55. /* terminal encoding */
  56. static char *codeset = NULL;
  57. /* function for encoding specific operations */
  58. static struct str_class used_class;
  59. GIConv str_cnv_to_term;
  60. GIConv str_cnv_from_term;
  61. GIConv str_cnv_not_convert;
  62. /* if enc is same encoding like on terminal */
  63. static int
  64. str_test_not_convert (const char *enc)
  65. {
  66. return g_ascii_strcasecmp (enc, codeset) == 0;
  67. }
  68. GIConv
  69. str_crt_conv_to (const char *to_enc)
  70. {
  71. return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  72. }
  73. GIConv
  74. str_crt_conv_from (const char *from_enc)
  75. {
  76. return (!str_test_not_convert (from_enc))
  77. ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  78. }
  79. void
  80. str_close_conv (GIConv conv)
  81. {
  82. if (conv != str_cnv_not_convert)
  83. g_iconv_close (conv);
  84. }
  85. static estr_t
  86. _str_convert (GIConv coder, const char *string, int size, GString * buffer)
  87. {
  88. estr_t state = ESTR_SUCCESS;
  89. gchar *tmp_buff = NULL;
  90. gssize left;
  91. gsize bytes_read = 0;
  92. gsize bytes_written = 0;
  93. GError *error = NULL;
  94. errno = 0;
  95. if (coder == INVALID_CONV)
  96. return ESTR_FAILURE;
  97. if (string == NULL || buffer == NULL)
  98. return ESTR_FAILURE;
  99. /*
  100. if (! used_class.is_valid_string (string))
  101. {
  102. return ESTR_FAILURE;
  103. }
  104. */
  105. if (size < 0)
  106. {
  107. size = strlen (string);
  108. }
  109. else
  110. {
  111. left = strlen (string);
  112. if (left < size)
  113. size = left;
  114. }
  115. left = size;
  116. g_iconv (coder, NULL, NULL, NULL, NULL);
  117. while (left)
  118. {
  119. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  120. left, coder, &bytes_read, &bytes_written, &error);
  121. if (error)
  122. {
  123. int code = error->code;
  124. g_error_free (error);
  125. error = NULL;
  126. switch (code)
  127. {
  128. case G_CONVERT_ERROR_NO_CONVERSION:
  129. /* Conversion between the requested character sets is not supported. */
  130. tmp_buff = g_strnfill (strlen (string), '?');
  131. g_string_append (buffer, tmp_buff);
  132. g_free (tmp_buff);
  133. return ESTR_FAILURE;
  134. case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
  135. /* Invalid byte sequence in conversion input. */
  136. if ((tmp_buff == NULL) && (bytes_read != 0))
  137. /* recode valid byte sequence */
  138. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  139. bytes_read, coder, NULL, NULL, NULL);
  140. if (tmp_buff != NULL)
  141. {
  142. g_string_append (buffer, tmp_buff);
  143. g_free (tmp_buff);
  144. }
  145. if ((int) bytes_read < left)
  146. {
  147. string += bytes_read + 1;
  148. size -= (bytes_read + 1);
  149. left -= (bytes_read + 1);
  150. g_string_append_c (buffer, *(string - 1));
  151. }
  152. else
  153. {
  154. return ESTR_PROBLEM;
  155. }
  156. state = ESTR_PROBLEM;
  157. break;
  158. case G_CONVERT_ERROR_PARTIAL_INPUT:
  159. /* Partial character sequence at end of input. */
  160. g_string_append (buffer, tmp_buff);
  161. g_free (tmp_buff);
  162. if ((int) bytes_read < left)
  163. {
  164. left = left - bytes_read;
  165. tmp_buff = g_strnfill (left, '?');
  166. g_string_append (buffer, tmp_buff);
  167. g_free (tmp_buff);
  168. }
  169. return ESTR_PROBLEM;
  170. case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
  171. case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
  172. case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
  173. default:
  174. g_free (tmp_buff);
  175. return ESTR_FAILURE;
  176. }
  177. }
  178. else
  179. {
  180. if (tmp_buff != NULL)
  181. {
  182. if (*tmp_buff)
  183. {
  184. g_string_append (buffer, tmp_buff);
  185. g_free (tmp_buff);
  186. string += bytes_read;
  187. left -= bytes_read;
  188. }
  189. else
  190. {
  191. g_free (tmp_buff);
  192. g_string_append (buffer, string);
  193. return state;
  194. }
  195. }
  196. else
  197. {
  198. g_string_append (buffer, string);
  199. return ESTR_PROBLEM;
  200. }
  201. }
  202. }
  203. return state;
  204. }
  205. estr_t
  206. str_convert (GIConv coder, const char *string, GString * buffer)
  207. {
  208. return _str_convert (coder, string, -1, buffer);
  209. }
  210. estr_t
  211. str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
  212. {
  213. return _str_convert (coder, string, size, buffer);
  214. }
  215. gchar *
  216. str_conv_gerror_message (GError * error, const char *def_msg)
  217. {
  218. return used_class.conv_gerror_message (error, def_msg);
  219. }
  220. estr_t
  221. str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
  222. {
  223. estr_t result;
  224. if (coder == str_cnv_not_convert)
  225. {
  226. g_string_append (buffer, string != NULL ? string : "");
  227. result = ESTR_SUCCESS;
  228. }
  229. else
  230. result = _str_convert (coder, string, -1, buffer);
  231. return result;
  232. }
  233. estr_t
  234. str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  235. {
  236. return used_class.vfs_convert_to (coder, string, size, buffer);
  237. }
  238. void
  239. str_printf (GString * buffer, const char *format, ...)
  240. {
  241. va_list ap;
  242. va_start (ap, format);
  243. #if GLIB_CHECK_VERSION (2, 14, 0)
  244. g_string_append_vprintf (buffer, format, ap);
  245. #else
  246. {
  247. gchar *tmp;
  248. tmp = g_strdup_vprintf (format, ap);
  249. g_string_append (buffer, tmp);
  250. g_free (tmp);
  251. }
  252. #endif
  253. va_end (ap);
  254. }
  255. void
  256. str_insert_replace_char (GString * buffer)
  257. {
  258. used_class.insert_replace_char (buffer);
  259. }
  260. estr_t
  261. str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
  262. {
  263. size_t left;
  264. size_t cnv;
  265. g_iconv (conv, NULL, NULL, NULL, NULL);
  266. left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
  267. cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
  268. if (cnv == (size_t) (-1))
  269. {
  270. return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
  271. }
  272. else
  273. {
  274. output[0] = '\0';
  275. return ESTR_SUCCESS;
  276. }
  277. }
  278. const char *
  279. str_detect_termencoding (void)
  280. {
  281. return (nl_langinfo (CODESET));
  282. }
  283. static int
  284. str_test_encoding_class (const char *encoding, const char **table)
  285. {
  286. int t;
  287. int result = 0;
  288. if (encoding == NULL)
  289. return result;
  290. for (t = 0; table[t] != NULL; t++)
  291. {
  292. result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
  293. }
  294. return result;
  295. }
  296. static void
  297. str_choose_str_functions (void)
  298. {
  299. if (str_test_encoding_class (codeset, str_utf8_encodings))
  300. {
  301. used_class = str_utf8_init ();
  302. }
  303. else if (str_test_encoding_class (codeset, str_8bit_encodings))
  304. {
  305. used_class = str_8bit_init ();
  306. }
  307. else
  308. {
  309. used_class = str_ascii_init ();
  310. }
  311. }
  312. gboolean
  313. str_isutf8 (const char *codeset_name)
  314. {
  315. return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
  316. }
  317. void
  318. str_init_strings (const char *termenc)
  319. {
  320. codeset = g_strdup ((termenc != NULL) ? termenc : str_detect_termencoding ());
  321. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  322. if (str_cnv_not_convert == INVALID_CONV)
  323. {
  324. if (termenc != NULL)
  325. {
  326. g_free (codeset);
  327. codeset = g_strdup (str_detect_termencoding ());
  328. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  329. }
  330. if (str_cnv_not_convert == INVALID_CONV)
  331. {
  332. g_free (codeset);
  333. codeset = g_strdup ("ascii");
  334. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  335. }
  336. }
  337. str_cnv_to_term = str_cnv_not_convert;
  338. str_cnv_from_term = str_cnv_not_convert;
  339. str_choose_str_functions ();
  340. }
  341. void
  342. str_uninit_strings (void)
  343. {
  344. if (str_cnv_not_convert != INVALID_CONV)
  345. g_iconv_close (str_cnv_not_convert);
  346. g_free (codeset);
  347. }
  348. const char *
  349. str_term_form (const char *text)
  350. {
  351. return used_class.term_form (text);
  352. }
  353. const char *
  354. str_fit_to_term (const char *text, int width, align_crt_t just_mode)
  355. {
  356. return used_class.fit_to_term (text, width, just_mode);
  357. }
  358. const char *
  359. str_term_trim (const char *text, int width)
  360. {
  361. return used_class.term_trim (text, width);
  362. }
  363. const char *
  364. str_term_substring (const char *text, int start, int width)
  365. {
  366. return used_class.term_substring (text, start, width);
  367. }
  368. char *
  369. str_get_next_char (char *text)
  370. {
  371. used_class.cnext_char ((const char **) &text);
  372. return text;
  373. }
  374. const char *
  375. str_cget_next_char (const char *text)
  376. {
  377. used_class.cnext_char (&text);
  378. return text;
  379. }
  380. void
  381. str_next_char (char **text)
  382. {
  383. used_class.cnext_char ((const char **) text);
  384. }
  385. void
  386. str_cnext_char (const char **text)
  387. {
  388. used_class.cnext_char (text);
  389. }
  390. char *
  391. str_get_prev_char (char *text)
  392. {
  393. used_class.cprev_char ((const char **) &text);
  394. return text;
  395. }
  396. const char *
  397. str_cget_prev_char (const char *text)
  398. {
  399. used_class.cprev_char (&text);
  400. return text;
  401. }
  402. void
  403. str_prev_char (char **text)
  404. {
  405. used_class.cprev_char ((const char **) text);
  406. }
  407. void
  408. str_cprev_char (const char **text)
  409. {
  410. used_class.cprev_char (text);
  411. }
  412. char *
  413. str_get_next_char_safe (char *text)
  414. {
  415. used_class.cnext_char_safe ((const char **) &text);
  416. return text;
  417. }
  418. const char *
  419. str_cget_next_char_safe (const char *text)
  420. {
  421. used_class.cnext_char_safe (&text);
  422. return text;
  423. }
  424. void
  425. str_next_char_safe (char **text)
  426. {
  427. used_class.cnext_char_safe ((const char **) text);
  428. }
  429. void
  430. str_cnext_char_safe (const char **text)
  431. {
  432. used_class.cnext_char_safe (text);
  433. }
  434. char *
  435. str_get_prev_char_safe (char *text)
  436. {
  437. used_class.cprev_char_safe ((const char **) &text);
  438. return text;
  439. }
  440. const char *
  441. str_cget_prev_char_safe (const char *text)
  442. {
  443. used_class.cprev_char_safe (&text);
  444. return text;
  445. }
  446. void
  447. str_prev_char_safe (char **text)
  448. {
  449. used_class.cprev_char_safe ((const char **) text);
  450. }
  451. void
  452. str_cprev_char_safe (const char **text)
  453. {
  454. used_class.cprev_char_safe (text);
  455. }
  456. int
  457. str_next_noncomb_char (char **text)
  458. {
  459. return used_class.cnext_noncomb_char ((const char **) text);
  460. }
  461. int
  462. str_cnext_noncomb_char (const char **text)
  463. {
  464. return used_class.cnext_noncomb_char (text);
  465. }
  466. int
  467. str_prev_noncomb_char (char **text, const char *begin)
  468. {
  469. return used_class.cprev_noncomb_char ((const char **) text, begin);
  470. }
  471. int
  472. str_cprev_noncomb_char (const char **text, const char *begin)
  473. {
  474. return used_class.cprev_noncomb_char (text, begin);
  475. }
  476. int
  477. str_is_valid_char (const char *ch, size_t size)
  478. {
  479. return used_class.is_valid_char (ch, size);
  480. }
  481. int
  482. str_term_width1 (const char *text)
  483. {
  484. return used_class.term_width1 (text);
  485. }
  486. int
  487. str_term_width2 (const char *text, size_t length)
  488. {
  489. return used_class.term_width2 (text, length);
  490. }
  491. int
  492. str_term_char_width (const char *text)
  493. {
  494. return used_class.term_char_width (text);
  495. }
  496. int
  497. str_offset_to_pos (const char *text, size_t length)
  498. {
  499. return used_class.offset_to_pos (text, length);
  500. }
  501. int
  502. str_length (const char *text)
  503. {
  504. return used_class.length (text);
  505. }
  506. int
  507. str_length_char (const char *text)
  508. {
  509. return str_cget_next_char_safe (text) - text;
  510. }
  511. int
  512. str_length2 (const char *text, int size)
  513. {
  514. return used_class.length2 (text, size);
  515. }
  516. int
  517. str_length_noncomb (const char *text)
  518. {
  519. return used_class.length_noncomb (text);
  520. }
  521. int
  522. str_column_to_pos (const char *text, size_t pos)
  523. {
  524. return used_class.column_to_pos (text, pos);
  525. }
  526. int
  527. str_isspace (const char *ch)
  528. {
  529. return used_class.isspace (ch);
  530. }
  531. int
  532. str_ispunct (const char *ch)
  533. {
  534. return used_class.ispunct (ch);
  535. }
  536. int
  537. str_isalnum (const char *ch)
  538. {
  539. return used_class.isalnum (ch);
  540. }
  541. int
  542. str_isdigit (const char *ch)
  543. {
  544. return used_class.isdigit (ch);
  545. }
  546. int
  547. str_toupper (const char *ch, char **out, size_t * remain)
  548. {
  549. return used_class.toupper (ch, out, remain);
  550. }
  551. int
  552. str_tolower (const char *ch, char **out, size_t * remain)
  553. {
  554. return used_class.tolower (ch, out, remain);
  555. }
  556. int
  557. str_isprint (const char *ch)
  558. {
  559. return used_class.isprint (ch);
  560. }
  561. int
  562. str_iscombiningmark (const char *ch)
  563. {
  564. return used_class.iscombiningmark (ch);
  565. }
  566. const char *
  567. str_trunc (const char *text, int width)
  568. {
  569. return used_class.trunc (text, width);
  570. }
  571. char *
  572. str_create_search_needle (const char *needle, int case_sen)
  573. {
  574. return used_class.create_search_needle (needle, case_sen);
  575. }
  576. void
  577. str_release_search_needle (char *needle, int case_sen)
  578. {
  579. used_class.release_search_needle (needle, case_sen);
  580. }
  581. const char *
  582. str_search_first (const char *text, const char *search, int case_sen)
  583. {
  584. return used_class.search_first (text, search, case_sen);
  585. }
  586. const char *
  587. str_search_last (const char *text, const char *search, int case_sen)
  588. {
  589. return used_class.search_last (text, search, case_sen);
  590. }
  591. int
  592. str_is_valid_string (const char *text)
  593. {
  594. return used_class.is_valid_string (text);
  595. }
  596. int
  597. str_compare (const char *t1, const char *t2)
  598. {
  599. return used_class.compare (t1, t2);
  600. }
  601. int
  602. str_ncompare (const char *t1, const char *t2)
  603. {
  604. return used_class.ncompare (t1, t2);
  605. }
  606. int
  607. str_casecmp (const char *t1, const char *t2)
  608. {
  609. return used_class.casecmp (t1, t2);
  610. }
  611. int
  612. str_ncasecmp (const char *t1, const char *t2)
  613. {
  614. return used_class.ncasecmp (t1, t2);
  615. }
  616. int
  617. str_prefix (const char *text, const char *prefix)
  618. {
  619. return used_class.prefix (text, prefix);
  620. }
  621. int
  622. str_caseprefix (const char *text, const char *prefix)
  623. {
  624. return used_class.caseprefix (text, prefix);
  625. }
  626. void
  627. str_fix_string (char *text)
  628. {
  629. used_class.fix_string (text);
  630. }
  631. char *
  632. str_create_key (const char *text, int case_sen)
  633. {
  634. return used_class.create_key (text, case_sen);
  635. }
  636. char *
  637. str_create_key_for_filename (const char *text, int case_sen)
  638. {
  639. return used_class.create_key_for_filename (text, case_sen);
  640. }
  641. int
  642. str_key_collate (const char *t1, const char *t2, int case_sen)
  643. {
  644. return used_class.key_collate (t1, t2, case_sen);
  645. }
  646. void
  647. str_release_key (char *key, int case_sen)
  648. {
  649. used_class.release_key (key, case_sen);
  650. }
  651. void
  652. str_msg_term_size (const char *text, int *lines, int *columns)
  653. {
  654. char *p, *tmp;
  655. char *q;
  656. char c = '\0';
  657. int width;
  658. *lines = 1;
  659. *columns = 0;
  660. tmp = g_strdup (text);
  661. p = tmp;
  662. while (TRUE)
  663. {
  664. q = strchr (p, '\n');
  665. if (q != NULL)
  666. {
  667. c = q[0];
  668. q[0] = '\0';
  669. }
  670. width = str_term_width1 (p);
  671. if (width > *columns)
  672. *columns = width;
  673. if (q == NULL)
  674. break;
  675. q[0] = c;
  676. p = q + 1;
  677. (*lines)++;
  678. }
  679. g_free (tmp);
  680. }
  681. /* --------------------------------------------------------------------------------------------- */
  682. char *
  683. strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
  684. {
  685. char *semi;
  686. ssize_t len;
  687. len = strlen (haystack);
  688. do
  689. {
  690. semi = g_strrstr_len (haystack, len, needle);
  691. if (semi == NULL)
  692. return NULL;
  693. len = semi - haystack - 1;
  694. }
  695. while (skip_count-- != 0);
  696. return semi;
  697. }
  698. /* --------------------------------------------------------------------------------------------- */