strutil.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. /*
  2. Common strings utilities
  3. Copyright (C) 2007, 2011, 2013
  4. The Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. This file is part of the Midnight Commander.
  8. The Midnight Commander is free software: you can redistribute it
  9. and/or modify it under the terms of the GNU General Public License as
  10. published by the Free Software Foundation, either version 3 of the License,
  11. or (at your option) any later version.
  12. The Midnight Commander is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. GNU General Public License for more details.
  16. You should have received a copy of the GNU General Public License
  17. along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <config.h>
  20. #include <stdlib.h>
  21. #include <langinfo.h>
  22. #include <string.h>
  23. #include <errno.h>
  24. #include "lib/global.h"
  25. #include "lib/strutil.h"
  26. /*names, that are used for utf-8 */
  27. static const char *str_utf8_encodings[] = {
  28. "utf-8",
  29. "utf8",
  30. NULL
  31. };
  32. /* standard 8bit encodings, no wide or multibytes characters */
  33. static const char *str_8bit_encodings[] = {
  34. "cp-1251",
  35. "cp1251",
  36. "cp-1250",
  37. "cp1250",
  38. "cp-866",
  39. "cp866",
  40. "ibm-866",
  41. "ibm866",
  42. "cp-850",
  43. "cp850",
  44. "cp-852",
  45. "cp852",
  46. "iso-8859",
  47. "iso8859",
  48. "koi8",
  49. NULL
  50. };
  51. /* terminal encoding */
  52. static char *codeset = NULL;
  53. static char *term_encoding = NULL;
  54. /* function for encoding specific operations */
  55. static struct str_class used_class;
  56. GIConv str_cnv_to_term;
  57. GIConv str_cnv_from_term;
  58. GIConv str_cnv_not_convert = INVALID_CONV;
  59. /* if enc is same encoding like on terminal */
  60. static int
  61. str_test_not_convert (const char *enc)
  62. {
  63. return g_ascii_strcasecmp (enc, codeset) == 0;
  64. }
  65. GIConv
  66. str_crt_conv_to (const char *to_enc)
  67. {
  68. return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  69. }
  70. GIConv
  71. str_crt_conv_from (const char *from_enc)
  72. {
  73. return (!str_test_not_convert (from_enc))
  74. ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  75. }
  76. void
  77. str_close_conv (GIConv conv)
  78. {
  79. if (conv != str_cnv_not_convert)
  80. g_iconv_close (conv);
  81. }
  82. static estr_t
  83. _str_convert (GIConv coder, const char *string, int size, GString * buffer)
  84. {
  85. estr_t state = ESTR_SUCCESS;
  86. gssize left;
  87. gsize bytes_read = 0;
  88. gsize bytes_written = 0;
  89. errno = 0; /* FIXME: is it really needed? */
  90. if (coder == INVALID_CONV)
  91. return ESTR_FAILURE;
  92. if (string == NULL || buffer == NULL)
  93. return ESTR_FAILURE;
  94. /*
  95. if (! used_class.is_valid_string (string))
  96. {
  97. return ESTR_FAILURE;
  98. }
  99. */
  100. if (size < 0)
  101. size = strlen (string);
  102. else
  103. {
  104. left = strlen (string);
  105. if (left < size)
  106. size = left;
  107. }
  108. left = size;
  109. g_iconv (coder, NULL, NULL, NULL, NULL);
  110. while (left != 0)
  111. {
  112. gchar *tmp_buff;
  113. GError *error = NULL;
  114. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  115. left, coder, &bytes_read, &bytes_written, &error);
  116. if (error != NULL)
  117. {
  118. int code = error->code;
  119. g_error_free (error);
  120. error = NULL;
  121. switch (code)
  122. {
  123. case G_CONVERT_ERROR_NO_CONVERSION:
  124. /* Conversion between the requested character sets is not supported. */
  125. tmp_buff = g_strnfill (strlen (string), '?');
  126. g_string_append (buffer, tmp_buff);
  127. g_free (tmp_buff);
  128. return ESTR_FAILURE;
  129. case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
  130. /* Invalid byte sequence in conversion input. */
  131. if ((tmp_buff == NULL) && (bytes_read != 0))
  132. /* recode valid byte sequence */
  133. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  134. bytes_read, coder, NULL, NULL, NULL);
  135. if (tmp_buff != NULL)
  136. {
  137. g_string_append (buffer, tmp_buff);
  138. g_free (tmp_buff);
  139. }
  140. if ((int) bytes_read >= left)
  141. return ESTR_PROBLEM;
  142. string += bytes_read + 1;
  143. size -= (bytes_read + 1);
  144. left -= (bytes_read + 1);
  145. g_string_append_c (buffer, *(string - 1));
  146. state = ESTR_PROBLEM;
  147. break;
  148. case G_CONVERT_ERROR_PARTIAL_INPUT:
  149. /* Partial character sequence at end of input. */
  150. g_string_append (buffer, tmp_buff);
  151. g_free (tmp_buff);
  152. if ((int) bytes_read < left)
  153. {
  154. left = left - bytes_read;
  155. tmp_buff = g_strnfill (left, '?');
  156. g_string_append (buffer, tmp_buff);
  157. g_free (tmp_buff);
  158. }
  159. return ESTR_PROBLEM;
  160. case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
  161. case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
  162. case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
  163. default:
  164. g_free (tmp_buff);
  165. return ESTR_FAILURE;
  166. }
  167. }
  168. else if (tmp_buff == NULL)
  169. {
  170. g_string_append (buffer, string);
  171. return ESTR_PROBLEM;
  172. }
  173. else if (*tmp_buff == '\0')
  174. {
  175. g_free (tmp_buff);
  176. g_string_append (buffer, string);
  177. return state;
  178. }
  179. else
  180. {
  181. g_string_append (buffer, tmp_buff);
  182. g_free (tmp_buff);
  183. string += bytes_read;
  184. left -= bytes_read;
  185. }
  186. }
  187. return state;
  188. }
  189. estr_t
  190. str_convert (GIConv coder, const char *string, GString * buffer)
  191. {
  192. return _str_convert (coder, string, -1, buffer);
  193. }
  194. estr_t
  195. str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
  196. {
  197. return _str_convert (coder, string, size, buffer);
  198. }
  199. gchar *
  200. str_conv_gerror_message (GError * error, const char *def_msg)
  201. {
  202. return used_class.conv_gerror_message (error, def_msg);
  203. }
  204. estr_t
  205. str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
  206. {
  207. estr_t result = ESTR_SUCCESS;
  208. if (coder == str_cnv_not_convert)
  209. g_string_append (buffer, string != NULL ? string : "");
  210. else
  211. result = _str_convert (coder, string, -1, buffer);
  212. return result;
  213. }
  214. estr_t
  215. str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  216. {
  217. return used_class.vfs_convert_to (coder, string, size, buffer);
  218. }
  219. void
  220. str_printf (GString * buffer, const char *format, ...)
  221. {
  222. va_list ap;
  223. va_start (ap, format);
  224. #if GLIB_CHECK_VERSION (2, 14, 0)
  225. g_string_append_vprintf (buffer, format, ap);
  226. #else
  227. {
  228. gchar *tmp;
  229. tmp = g_strdup_vprintf (format, ap);
  230. g_string_append (buffer, tmp);
  231. g_free (tmp);
  232. }
  233. #endif
  234. va_end (ap);
  235. }
  236. void
  237. str_insert_replace_char (GString * buffer)
  238. {
  239. used_class.insert_replace_char (buffer);
  240. }
  241. estr_t
  242. str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
  243. {
  244. size_t left;
  245. size_t cnv;
  246. g_iconv (conv, NULL, NULL, NULL, NULL);
  247. left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
  248. cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
  249. if (cnv == (size_t) (-1))
  250. return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
  251. output[0] = '\0';
  252. return ESTR_SUCCESS;
  253. }
  254. const char *
  255. str_detect_termencoding (void)
  256. {
  257. if (term_encoding == NULL)
  258. {
  259. /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
  260. to utf-8 or UTF-8.
  261. On Mac OS X, it returns the same case as the LANG input.
  262. So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
  263. term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
  264. }
  265. return term_encoding;
  266. }
  267. static int
  268. str_test_encoding_class (const char *encoding, const char **table)
  269. {
  270. int result = 0;
  271. if (encoding != NULL)
  272. {
  273. int t;
  274. for (t = 0; table[t] != NULL; t++)
  275. if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
  276. result++;
  277. }
  278. return result;
  279. }
  280. static void
  281. str_choose_str_functions (void)
  282. {
  283. if (str_test_encoding_class (codeset, str_utf8_encodings))
  284. used_class = str_utf8_init ();
  285. else if (str_test_encoding_class (codeset, str_8bit_encodings))
  286. used_class = str_8bit_init ();
  287. else
  288. used_class = str_ascii_init ();
  289. }
  290. gboolean
  291. str_isutf8 (const char *codeset_name)
  292. {
  293. return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
  294. }
  295. void
  296. str_init_strings (const char *termenc)
  297. {
  298. codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
  299. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  300. if (str_cnv_not_convert == INVALID_CONV)
  301. {
  302. if (termenc != NULL)
  303. {
  304. g_free (codeset);
  305. codeset = g_strdup (str_detect_termencoding ());
  306. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  307. }
  308. if (str_cnv_not_convert == INVALID_CONV)
  309. {
  310. g_free (codeset);
  311. codeset = g_strdup (DEFAULT_CHARSET);
  312. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  313. }
  314. }
  315. str_cnv_to_term = str_cnv_not_convert;
  316. str_cnv_from_term = str_cnv_not_convert;
  317. str_choose_str_functions ();
  318. }
  319. void
  320. str_uninit_strings (void)
  321. {
  322. if (str_cnv_not_convert != INVALID_CONV)
  323. g_iconv_close (str_cnv_not_convert);
  324. g_free (term_encoding);
  325. g_free (codeset);
  326. }
  327. const char *
  328. str_term_form (const char *text)
  329. {
  330. return used_class.term_form (text);
  331. }
  332. const char *
  333. str_fit_to_term (const char *text, int width, align_crt_t just_mode)
  334. {
  335. return used_class.fit_to_term (text, width, just_mode);
  336. }
  337. const char *
  338. str_term_trim (const char *text, int width)
  339. {
  340. return used_class.term_trim (text, width);
  341. }
  342. const char *
  343. str_term_substring (const char *text, int start, int width)
  344. {
  345. return used_class.term_substring (text, start, width);
  346. }
  347. char *
  348. str_get_next_char (char *text)
  349. {
  350. used_class.cnext_char ((const char **) &text);
  351. return text;
  352. }
  353. const char *
  354. str_cget_next_char (const char *text)
  355. {
  356. used_class.cnext_char (&text);
  357. return text;
  358. }
  359. void
  360. str_next_char (char **text)
  361. {
  362. used_class.cnext_char ((const char **) text);
  363. }
  364. void
  365. str_cnext_char (const char **text)
  366. {
  367. used_class.cnext_char (text);
  368. }
  369. char *
  370. str_get_prev_char (char *text)
  371. {
  372. used_class.cprev_char ((const char **) &text);
  373. return text;
  374. }
  375. const char *
  376. str_cget_prev_char (const char *text)
  377. {
  378. used_class.cprev_char (&text);
  379. return text;
  380. }
  381. void
  382. str_prev_char (char **text)
  383. {
  384. used_class.cprev_char ((const char **) text);
  385. }
  386. void
  387. str_cprev_char (const char **text)
  388. {
  389. used_class.cprev_char (text);
  390. }
  391. char *
  392. str_get_next_char_safe (char *text)
  393. {
  394. used_class.cnext_char_safe ((const char **) &text);
  395. return text;
  396. }
  397. const char *
  398. str_cget_next_char_safe (const char *text)
  399. {
  400. used_class.cnext_char_safe (&text);
  401. return text;
  402. }
  403. void
  404. str_next_char_safe (char **text)
  405. {
  406. used_class.cnext_char_safe ((const char **) text);
  407. }
  408. void
  409. str_cnext_char_safe (const char **text)
  410. {
  411. used_class.cnext_char_safe (text);
  412. }
  413. char *
  414. str_get_prev_char_safe (char *text)
  415. {
  416. used_class.cprev_char_safe ((const char **) &text);
  417. return text;
  418. }
  419. const char *
  420. str_cget_prev_char_safe (const char *text)
  421. {
  422. used_class.cprev_char_safe (&text);
  423. return text;
  424. }
  425. void
  426. str_prev_char_safe (char **text)
  427. {
  428. used_class.cprev_char_safe ((const char **) text);
  429. }
  430. void
  431. str_cprev_char_safe (const char **text)
  432. {
  433. used_class.cprev_char_safe (text);
  434. }
  435. int
  436. str_next_noncomb_char (char **text)
  437. {
  438. return used_class.cnext_noncomb_char ((const char **) text);
  439. }
  440. int
  441. str_cnext_noncomb_char (const char **text)
  442. {
  443. return used_class.cnext_noncomb_char (text);
  444. }
  445. int
  446. str_prev_noncomb_char (char **text, const char *begin)
  447. {
  448. return used_class.cprev_noncomb_char ((const char **) text, begin);
  449. }
  450. int
  451. str_cprev_noncomb_char (const char **text, const char *begin)
  452. {
  453. return used_class.cprev_noncomb_char (text, begin);
  454. }
  455. int
  456. str_is_valid_char (const char *ch, size_t size)
  457. {
  458. return used_class.is_valid_char (ch, size);
  459. }
  460. int
  461. str_term_width1 (const char *text)
  462. {
  463. return used_class.term_width1 (text);
  464. }
  465. int
  466. str_term_width2 (const char *text, size_t length)
  467. {
  468. return used_class.term_width2 (text, length);
  469. }
  470. int
  471. str_term_char_width (const char *text)
  472. {
  473. return used_class.term_char_width (text);
  474. }
  475. int
  476. str_offset_to_pos (const char *text, size_t length)
  477. {
  478. return used_class.offset_to_pos (text, length);
  479. }
  480. int
  481. str_length (const char *text)
  482. {
  483. return used_class.length (text);
  484. }
  485. int
  486. str_length_char (const char *text)
  487. {
  488. return str_cget_next_char_safe (text) - text;
  489. }
  490. int
  491. str_length2 (const char *text, int size)
  492. {
  493. return used_class.length2 (text, size);
  494. }
  495. int
  496. str_length_noncomb (const char *text)
  497. {
  498. return used_class.length_noncomb (text);
  499. }
  500. int
  501. str_column_to_pos (const char *text, size_t pos)
  502. {
  503. return used_class.column_to_pos (text, pos);
  504. }
  505. int
  506. str_isspace (const char *ch)
  507. {
  508. return used_class.char_isspace (ch);
  509. }
  510. int
  511. str_ispunct (const char *ch)
  512. {
  513. return used_class.char_ispunct (ch);
  514. }
  515. int
  516. str_isalnum (const char *ch)
  517. {
  518. return used_class.char_isalnum (ch);
  519. }
  520. int
  521. str_isdigit (const char *ch)
  522. {
  523. return used_class.char_isdigit (ch);
  524. }
  525. int
  526. str_toupper (const char *ch, char **out, size_t * remain)
  527. {
  528. return used_class.char_toupper (ch, out, remain);
  529. }
  530. int
  531. str_tolower (const char *ch, char **out, size_t * remain)
  532. {
  533. return used_class.char_tolower (ch, out, remain);
  534. }
  535. int
  536. str_isprint (const char *ch)
  537. {
  538. return used_class.char_isprint (ch);
  539. }
  540. gboolean
  541. str_iscombiningmark (const char *ch)
  542. {
  543. return used_class.char_iscombiningmark (ch);
  544. }
  545. const char *
  546. str_trunc (const char *text, int width)
  547. {
  548. return used_class.trunc (text, width);
  549. }
  550. char *
  551. str_create_search_needle (const char *needle, int case_sen)
  552. {
  553. return used_class.create_search_needle (needle, case_sen);
  554. }
  555. void
  556. str_release_search_needle (char *needle, int case_sen)
  557. {
  558. used_class.release_search_needle (needle, case_sen);
  559. }
  560. const char *
  561. str_search_first (const char *text, const char *search, int case_sen)
  562. {
  563. return used_class.search_first (text, search, case_sen);
  564. }
  565. const char *
  566. str_search_last (const char *text, const char *search, int case_sen)
  567. {
  568. return used_class.search_last (text, search, case_sen);
  569. }
  570. int
  571. str_is_valid_string (const char *text)
  572. {
  573. return used_class.is_valid_string (text);
  574. }
  575. int
  576. str_compare (const char *t1, const char *t2)
  577. {
  578. return used_class.compare (t1, t2);
  579. }
  580. int
  581. str_ncompare (const char *t1, const char *t2)
  582. {
  583. return used_class.ncompare (t1, t2);
  584. }
  585. int
  586. str_casecmp (const char *t1, const char *t2)
  587. {
  588. return used_class.casecmp (t1, t2);
  589. }
  590. int
  591. str_ncasecmp (const char *t1, const char *t2)
  592. {
  593. return used_class.ncasecmp (t1, t2);
  594. }
  595. int
  596. str_prefix (const char *text, const char *prefix)
  597. {
  598. return used_class.prefix (text, prefix);
  599. }
  600. int
  601. str_caseprefix (const char *text, const char *prefix)
  602. {
  603. return used_class.caseprefix (text, prefix);
  604. }
  605. void
  606. str_fix_string (char *text)
  607. {
  608. used_class.fix_string (text);
  609. }
  610. char *
  611. str_create_key (const char *text, int case_sen)
  612. {
  613. return used_class.create_key (text, case_sen);
  614. }
  615. char *
  616. str_create_key_for_filename (const char *text, int case_sen)
  617. {
  618. return used_class.create_key_for_filename (text, case_sen);
  619. }
  620. int
  621. str_key_collate (const char *t1, const char *t2, int case_sen)
  622. {
  623. return used_class.key_collate (t1, t2, case_sen);
  624. }
  625. void
  626. str_release_key (char *key, int case_sen)
  627. {
  628. used_class.release_key (key, case_sen);
  629. }
  630. void
  631. str_msg_term_size (const char *text, int *lines, int *columns)
  632. {
  633. char *p, *tmp;
  634. char *q;
  635. char c = '\0';
  636. *lines = 1;
  637. *columns = 0;
  638. tmp = g_strdup (text);
  639. p = tmp;
  640. while (TRUE)
  641. {
  642. int width;
  643. q = strchr (p, '\n');
  644. if (q != NULL)
  645. {
  646. c = q[0];
  647. q[0] = '\0';
  648. }
  649. width = str_term_width1 (p);
  650. if (width > *columns)
  651. *columns = width;
  652. if (q == NULL)
  653. break;
  654. q[0] = c;
  655. p = q + 1;
  656. (*lines)++;
  657. }
  658. g_free (tmp);
  659. }
  660. /* --------------------------------------------------------------------------------------------- */
  661. char *
  662. strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
  663. {
  664. char *semi;
  665. ssize_t len;
  666. len = strlen (haystack);
  667. do
  668. {
  669. semi = g_strrstr_len (haystack, len, needle);
  670. if (semi == NULL)
  671. return NULL;
  672. len = semi - haystack - 1;
  673. }
  674. while (skip_count-- != 0);
  675. return semi;
  676. }
  677. /* --------------------------------------------------------------------------------------------- */
  678. /* Interprete string as a non-negative decimal integer, optionally multiplied by various values.
  679. *
  680. * @param str input value
  681. * @param invalid set to TRUE if "str" does not represent a number in this format
  682. *
  683. * @return non-integer representation of "str", 0 in case of error.
  684. */
  685. uintmax_t
  686. parse_integer (const char *str, gboolean * invalid)
  687. {
  688. uintmax_t n;
  689. char *suffix;
  690. strtol_error_t e;
  691. e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
  692. if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
  693. {
  694. uintmax_t multiplier;
  695. multiplier = parse_integer (suffix + 1, invalid);
  696. if (multiplier != 0 && n * multiplier / multiplier != n)
  697. {
  698. *invalid = TRUE;
  699. return 0;
  700. }
  701. n *= multiplier;
  702. }
  703. else if (e != LONGINT_OK)
  704. {
  705. *invalid = TRUE;
  706. n = 0;
  707. }
  708. return n;
  709. }
  710. /* --------------------------------------------------------------------------------------------- */