strutil.c 17 KB


  1. /*
  2. Common strings utilities
  3. Copyright (C) 2007, 2011
  4. The Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. The file_date routine is mostly from GNU's fileutils package,
  8. written by Richard Stallman and David MacKenzie.
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdlib.h>
  23. #include <stdio.h>
  24. #include <langinfo.h>
  25. #include <string.h>
  26. #include <errno.h>
  27. #include <stdarg.h>
  28. #include "lib/global.h"
  29. #include "lib/strutil.h"
  30. /*names, that are used for utf-8 */
  31. static const char *str_utf8_encodings[] = {
  32. "utf-8",
  33. "utf8",
  34. NULL
  35. };
  36. /* standard 8bit encodings, no wide or multibytes characters */
  37. static const char *str_8bit_encodings[] = {
  38. "cp-1251",
  39. "cp1251",
  40. "cp-1250",
  41. "cp1250",
  42. "cp-866",
  43. "cp866",
  44. "ibm-866",
  45. "ibm866",
  46. "cp-850",
  47. "cp850",
  48. "cp-852",
  49. "cp852",
  50. "iso-8859",
  51. "iso8859",
  52. "koi8",
  53. NULL
  54. };
  55. /* terminal encoding */
  56. static char *codeset = NULL;
  57. static char *term_encoding = NULL;
  58. /* function for encoding specific operations */
  59. static struct str_class used_class;
  60. GIConv str_cnv_to_term;
  61. GIConv str_cnv_from_term;
  62. GIConv str_cnv_not_convert = INVALID_CONV;
  63. /* if enc is same encoding like on terminal */
  64. static int
  65. str_test_not_convert (const char *enc)
  66. {
  67. return g_ascii_strcasecmp (enc, codeset) == 0;
  68. }
  69. GIConv
  70. str_crt_conv_to (const char *to_enc)
  71. {
  72. return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
  73. }
  74. GIConv
  75. str_crt_conv_from (const char *from_enc)
  76. {
  77. return (!str_test_not_convert (from_enc))
  78. ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
  79. }
  80. void
  81. str_close_conv (GIConv conv)
  82. {
  83. if (conv != str_cnv_not_convert)
  84. g_iconv_close (conv);
  85. }
  86. static estr_t
  87. _str_convert (GIConv coder, const char *string, int size, GString * buffer)
  88. {
  89. estr_t state = ESTR_SUCCESS;
  90. gchar *tmp_buff = NULL;
  91. gssize left;
  92. gsize bytes_read = 0;
  93. gsize bytes_written = 0;
  94. GError *error = NULL;
  95. errno = 0;
  96. if (coder == INVALID_CONV)
  97. return ESTR_FAILURE;
  98. if (string == NULL || buffer == NULL)
  99. return ESTR_FAILURE;
  100. /*
  101. if (! used_class.is_valid_string (string))
  102. {
  103. return ESTR_FAILURE;
  104. }
  105. */
  106. if (size < 0)
  107. {
  108. size = strlen (string);
  109. }
  110. else
  111. {
  112. left = strlen (string);
  113. if (left < size)
  114. size = left;
  115. }
  116. left = size;
  117. g_iconv (coder, NULL, NULL, NULL, NULL);
  118. while (left)
  119. {
  120. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  121. left, coder, &bytes_read, &bytes_written, &error);
  122. if (error)
  123. {
  124. int code = error->code;
  125. g_error_free (error);
  126. error = NULL;
  127. switch (code)
  128. {
  129. case G_CONVERT_ERROR_NO_CONVERSION:
  130. /* Conversion between the requested character sets is not supported. */
  131. tmp_buff = g_strnfill (strlen (string), '?');
  132. g_string_append (buffer, tmp_buff);
  133. g_free (tmp_buff);
  134. return ESTR_FAILURE;
  135. case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
  136. /* Invalid byte sequence in conversion input. */
  137. if ((tmp_buff == NULL) && (bytes_read != 0))
  138. /* recode valid byte sequence */
  139. tmp_buff = g_convert_with_iconv ((const gchar *) string,
  140. bytes_read, coder, NULL, NULL, NULL);
  141. if (tmp_buff != NULL)
  142. {
  143. g_string_append (buffer, tmp_buff);
  144. g_free (tmp_buff);
  145. }
  146. if ((int) bytes_read < left)
  147. {
  148. string += bytes_read + 1;
  149. size -= (bytes_read + 1);
  150. left -= (bytes_read + 1);
  151. g_string_append_c (buffer, *(string - 1));
  152. }
  153. else
  154. {
  155. return ESTR_PROBLEM;
  156. }
  157. state = ESTR_PROBLEM;
  158. break;
  159. case G_CONVERT_ERROR_PARTIAL_INPUT:
  160. /* Partial character sequence at end of input. */
  161. g_string_append (buffer, tmp_buff);
  162. g_free (tmp_buff);
  163. if ((int) bytes_read < left)
  164. {
  165. left = left - bytes_read;
  166. tmp_buff = g_strnfill (left, '?');
  167. g_string_append (buffer, tmp_buff);
  168. g_free (tmp_buff);
  169. }
  170. return ESTR_PROBLEM;
  171. case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
  172. case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
  173. case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
  174. default:
  175. g_free (tmp_buff);
  176. return ESTR_FAILURE;
  177. }
  178. }
  179. else
  180. {
  181. if (tmp_buff != NULL)
  182. {
  183. if (*tmp_buff)
  184. {
  185. g_string_append (buffer, tmp_buff);
  186. g_free (tmp_buff);
  187. string += bytes_read;
  188. left -= bytes_read;
  189. }
  190. else
  191. {
  192. g_free (tmp_buff);
  193. g_string_append (buffer, string);
  194. return state;
  195. }
  196. }
  197. else
  198. {
  199. g_string_append (buffer, string);
  200. return ESTR_PROBLEM;
  201. }
  202. }
  203. }
  204. return state;
  205. }
  206. estr_t
  207. str_convert (GIConv coder, const char *string, GString * buffer)
  208. {
  209. return _str_convert (coder, string, -1, buffer);
  210. }
  211. estr_t
  212. str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
  213. {
  214. return _str_convert (coder, string, size, buffer);
  215. }
  216. gchar *
  217. str_conv_gerror_message (GError * error, const char *def_msg)
  218. {
  219. return used_class.conv_gerror_message (error, def_msg);
  220. }
  221. estr_t
  222. str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
  223. {
  224. estr_t result;
  225. if (coder == str_cnv_not_convert)
  226. {
  227. g_string_append (buffer, string != NULL ? string : "");
  228. result = ESTR_SUCCESS;
  229. }
  230. else
  231. result = _str_convert (coder, string, -1, buffer);
  232. return result;
  233. }
  234. estr_t
  235. str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  236. {
  237. return used_class.vfs_convert_to (coder, string, size, buffer);
  238. }
  239. void
  240. str_printf (GString * buffer, const char *format, ...)
  241. {
  242. va_list ap;
  243. va_start (ap, format);
  244. #if GLIB_CHECK_VERSION (2, 14, 0)
  245. g_string_append_vprintf (buffer, format, ap);
  246. #else
  247. {
  248. gchar *tmp;
  249. tmp = g_strdup_vprintf (format, ap);
  250. g_string_append (buffer, tmp);
  251. g_free (tmp);
  252. }
  253. #endif
  254. va_end (ap);
  255. }
  256. void
  257. str_insert_replace_char (GString * buffer)
  258. {
  259. used_class.insert_replace_char (buffer);
  260. }
  261. estr_t
  262. str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
  263. {
  264. size_t left;
  265. size_t cnv;
  266. g_iconv (conv, NULL, NULL, NULL, NULL);
  267. left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
  268. cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
  269. if (cnv == (size_t) (-1))
  270. {
  271. return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
  272. }
  273. else
  274. {
  275. output[0] = '\0';
  276. return ESTR_SUCCESS;
  277. }
  278. }
  279. const char *
  280. str_detect_termencoding (void)
  281. {
  282. if (term_encoding == NULL)
  283. {
  284. /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
  285. to utf-8 or UTF-8.
  286. On Mac OS X, it returns the same case as the LANG input.
  287. So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
  288. term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
  289. }
  290. return term_encoding;
  291. }
  292. static int
  293. str_test_encoding_class (const char *encoding, const char **table)
  294. {
  295. int t;
  296. int result = 0;
  297. if (encoding == NULL)
  298. return result;
  299. for (t = 0; table[t] != NULL; t++)
  300. {
  301. result += (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0);
  302. }
  303. return result;
  304. }
  305. static void
  306. str_choose_str_functions (void)
  307. {
  308. if (str_test_encoding_class (codeset, str_utf8_encodings))
  309. {
  310. used_class = str_utf8_init ();
  311. }
  312. else if (str_test_encoding_class (codeset, str_8bit_encodings))
  313. {
  314. used_class = str_8bit_init ();
  315. }
  316. else
  317. {
  318. used_class = str_ascii_init ();
  319. }
  320. }
  321. gboolean
  322. str_isutf8 (const char *codeset_name)
  323. {
  324. return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
  325. }
  326. void
  327. str_init_strings (const char *termenc)
  328. {
  329. codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
  330. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  331. if (str_cnv_not_convert == INVALID_CONV)
  332. {
  333. if (termenc != NULL)
  334. {
  335. g_free (codeset);
  336. codeset = g_strdup (str_detect_termencoding ());
  337. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  338. }
  339. if (str_cnv_not_convert == INVALID_CONV)
  340. {
  341. g_free (codeset);
  342. codeset = g_strdup ("ASCII");
  343. str_cnv_not_convert = g_iconv_open (codeset, codeset);
  344. }
  345. }
  346. str_cnv_to_term = str_cnv_not_convert;
  347. str_cnv_from_term = str_cnv_not_convert;
  348. str_choose_str_functions ();
  349. }
  350. void
  351. str_uninit_strings (void)
  352. {
  353. if (str_cnv_not_convert != INVALID_CONV)
  354. g_iconv_close (str_cnv_not_convert);
  355. g_free (term_encoding);
  356. g_free (codeset);
  357. }
  358. const char *
  359. str_term_form (const char *text)
  360. {
  361. return used_class.term_form (text);
  362. }
  363. const char *
  364. str_fit_to_term (const char *text, int width, align_crt_t just_mode)
  365. {
  366. return used_class.fit_to_term (text, width, just_mode);
  367. }
  368. const char *
  369. str_term_trim (const char *text, int width)
  370. {
  371. return used_class.term_trim (text, width);
  372. }
  373. const char *
  374. str_term_substring (const char *text, int start, int width)
  375. {
  376. return used_class.term_substring (text, start, width);
  377. }
  378. char *
  379. str_get_next_char (char *text)
  380. {
  381. used_class.cnext_char ((const char **) &text);
  382. return text;
  383. }
  384. const char *
  385. str_cget_next_char (const char *text)
  386. {
  387. used_class.cnext_char (&text);
  388. return text;
  389. }
  390. void
  391. str_next_char (char **text)
  392. {
  393. used_class.cnext_char ((const char **) text);
  394. }
  395. void
  396. str_cnext_char (const char **text)
  397. {
  398. used_class.cnext_char (text);
  399. }
  400. char *
  401. str_get_prev_char (char *text)
  402. {
  403. used_class.cprev_char ((const char **) &text);
  404. return text;
  405. }
  406. const char *
  407. str_cget_prev_char (const char *text)
  408. {
  409. used_class.cprev_char (&text);
  410. return text;
  411. }
  412. void
  413. str_prev_char (char **text)
  414. {
  415. used_class.cprev_char ((const char **) text);
  416. }
  417. void
  418. str_cprev_char (const char **text)
  419. {
  420. used_class.cprev_char (text);
  421. }
  422. char *
  423. str_get_next_char_safe (char *text)
  424. {
  425. used_class.cnext_char_safe ((const char **) &text);
  426. return text;
  427. }
  428. const char *
  429. str_cget_next_char_safe (const char *text)
  430. {
  431. used_class.cnext_char_safe (&text);
  432. return text;
  433. }
  434. void
  435. str_next_char_safe (char **text)
  436. {
  437. used_class.cnext_char_safe ((const char **) text);
  438. }
  439. void
  440. str_cnext_char_safe (const char **text)
  441. {
  442. used_class.cnext_char_safe (text);
  443. }
  444. char *
  445. str_get_prev_char_safe (char *text)
  446. {
  447. used_class.cprev_char_safe ((const char **) &text);
  448. return text;
  449. }
  450. const char *
  451. str_cget_prev_char_safe (const char *text)
  452. {
  453. used_class.cprev_char_safe (&text);
  454. return text;
  455. }
  456. void
  457. str_prev_char_safe (char **text)
  458. {
  459. used_class.cprev_char_safe ((const char **) text);
  460. }
  461. void
  462. str_cprev_char_safe (const char **text)
  463. {
  464. used_class.cprev_char_safe (text);
  465. }
  466. int
  467. str_next_noncomb_char (char **text)
  468. {
  469. return used_class.cnext_noncomb_char ((const char **) text);
  470. }
  471. int
  472. str_cnext_noncomb_char (const char **text)
  473. {
  474. return used_class.cnext_noncomb_char (text);
  475. }
  476. int
  477. str_prev_noncomb_char (char **text, const char *begin)
  478. {
  479. return used_class.cprev_noncomb_char ((const char **) text, begin);
  480. }
  481. int
  482. str_cprev_noncomb_char (const char **text, const char *begin)
  483. {
  484. return used_class.cprev_noncomb_char (text, begin);
  485. }
  486. int
  487. str_is_valid_char (const char *ch, size_t size)
  488. {
  489. return used_class.is_valid_char (ch, size);
  490. }
  491. int
  492. str_term_width1 (const char *text)
  493. {
  494. return used_class.term_width1 (text);
  495. }
  496. int
  497. str_term_width2 (const char *text, size_t length)
  498. {
  499. return used_class.term_width2 (text, length);
  500. }
  501. int
  502. str_term_char_width (const char *text)
  503. {
  504. return used_class.term_char_width (text);
  505. }
  506. int
  507. str_offset_to_pos (const char *text, size_t length)
  508. {
  509. return used_class.offset_to_pos (text, length);
  510. }
  511. int
  512. str_length (const char *text)
  513. {
  514. return used_class.length (text);
  515. }
  516. int
  517. str_length_char (const char *text)
  518. {
  519. return str_cget_next_char_safe (text) - text;
  520. }
  521. int
  522. str_length2 (const char *text, int size)
  523. {
  524. return used_class.length2 (text, size);
  525. }
  526. int
  527. str_length_noncomb (const char *text)
  528. {
  529. return used_class.length_noncomb (text);
  530. }
  531. int
  532. str_column_to_pos (const char *text, size_t pos)
  533. {
  534. return used_class.column_to_pos (text, pos);
  535. }
  536. int
  537. str_isspace (const char *ch)
  538. {
  539. return used_class.char_isspace (ch);
  540. }
  541. int
  542. str_ispunct (const char *ch)
  543. {
  544. return used_class.char_ispunct (ch);
  545. }
  546. int
  547. str_isalnum (const char *ch)
  548. {
  549. return used_class.char_isalnum (ch);
  550. }
  551. int
  552. str_isdigit (const char *ch)
  553. {
  554. return used_class.char_isdigit (ch);
  555. }
  556. int
  557. str_toupper (const char *ch, char **out, size_t * remain)
  558. {
  559. return used_class.char_toupper (ch, out, remain);
  560. }
  561. int
  562. str_tolower (const char *ch, char **out, size_t * remain)
  563. {
  564. return used_class.char_tolower (ch, out, remain);
  565. }
  566. int
  567. str_isprint (const char *ch)
  568. {
  569. return used_class.char_isprint (ch);
  570. }
  571. gboolean
  572. str_iscombiningmark (const char *ch)
  573. {
  574. return used_class.char_iscombiningmark (ch);
  575. }
  576. const char *
  577. str_trunc (const char *text, int width)
  578. {
  579. return used_class.trunc (text, width);
  580. }
  581. char *
  582. str_create_search_needle (const char *needle, int case_sen)
  583. {
  584. return used_class.create_search_needle (needle, case_sen);
  585. }
  586. void
  587. str_release_search_needle (char *needle, int case_sen)
  588. {
  589. used_class.release_search_needle (needle, case_sen);
  590. }
  591. const char *
  592. str_search_first (const char *text, const char *search, int case_sen)
  593. {
  594. return used_class.search_first (text, search, case_sen);
  595. }
  596. const char *
  597. str_search_last (const char *text, const char *search, int case_sen)
  598. {
  599. return used_class.search_last (text, search, case_sen);
  600. }
  601. int
  602. str_is_valid_string (const char *text)
  603. {
  604. return used_class.is_valid_string (text);
  605. }
  606. int
  607. str_compare (const char *t1, const char *t2)
  608. {
  609. return used_class.compare (t1, t2);
  610. }
  611. int
  612. str_ncompare (const char *t1, const char *t2)
  613. {
  614. return used_class.ncompare (t1, t2);
  615. }
  616. int
  617. str_casecmp (const char *t1, const char *t2)
  618. {
  619. return used_class.casecmp (t1, t2);
  620. }
  621. int
  622. str_ncasecmp (const char *t1, const char *t2)
  623. {
  624. return used_class.ncasecmp (t1, t2);
  625. }
  626. int
  627. str_prefix (const char *text, const char *prefix)
  628. {
  629. return used_class.prefix (text, prefix);
  630. }
  631. int
  632. str_caseprefix (const char *text, const char *prefix)
  633. {
  634. return used_class.caseprefix (text, prefix);
  635. }
  636. void
  637. str_fix_string (char *text)
  638. {
  639. used_class.fix_string (text);
  640. }
  641. char *
  642. str_create_key (const char *text, int case_sen)
  643. {
  644. return used_class.create_key (text, case_sen);
  645. }
  646. char *
  647. str_create_key_for_filename (const char *text, int case_sen)
  648. {
  649. return used_class.create_key_for_filename (text, case_sen);
  650. }
  651. int
  652. str_key_collate (const char *t1, const char *t2, int case_sen)
  653. {
  654. return used_class.key_collate (t1, t2, case_sen);
  655. }
  656. void
  657. str_release_key (char *key, int case_sen)
  658. {
  659. used_class.release_key (key, case_sen);
  660. }
  661. void
  662. str_msg_term_size (const char *text, int *lines, int *columns)
  663. {
  664. char *p, *tmp;
  665. char *q;
  666. char c = '\0';
  667. int width;
  668. *lines = 1;
  669. *columns = 0;
  670. tmp = g_strdup (text);
  671. p = tmp;
  672. while (TRUE)
  673. {
  674. q = strchr (p, '\n');
  675. if (q != NULL)
  676. {
  677. c = q[0];
  678. q[0] = '\0';
  679. }
  680. width = str_term_width1 (p);
  681. if (width > *columns)
  682. *columns = width;
  683. if (q == NULL)
  684. break;
  685. q[0] = c;
  686. p = q + 1;
  687. (*lines)++;
  688. }
  689. g_free (tmp);
  690. }
  691. /* --------------------------------------------------------------------------------------------- */
  692. char *
  693. strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
  694. {
  695. char *semi;
  696. ssize_t len;
  697. len = strlen (haystack);
  698. do
  699. {
  700. semi = g_strrstr_len (haystack, len, needle);
  701. if (semi == NULL)
  702. return NULL;
  703. len = semi - haystack - 1;
  704. }
  705. while (skip_count-- != 0);
  706. return semi;
  707. }
  708. /* --------------------------------------------------------------------------------------------- */