strutil8bit.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. /*
  2. 8bit strings utilities
  3. Copyright (C) 2007, 2011
  4. The Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. The file_date routine is mostly from GNU's fileutils package,
  8. written by Richard Stallman and David MacKenzie.
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdio.h>
  23. #include <ctype.h>
  24. #include <errno.h>
  25. #include "lib/global.h"
  26. #include "lib/strutil.h"
  27. /* functions for singlebyte encodings, all characters have width 1
  28. * using standard system functions
  29. * there are only small differences between functions in strutil8bit.c
  30. * and strutilascii.c
  31. */
  32. static const char replch = '?';
  33. /*
  34. * Inlines to equalize 'char' signedness for single 'char' encodings.
  35. * Instead of writing
  36. * isspace((unsigned char)c);
  37. * you can write
  38. * char_isspace(c);
  39. */
  40. #define DECLARE_CTYPE_WRAPPER(func_name) \
  41. static inline int char_##func_name(char c) \
  42. { \
  43. return func_name((int)(unsigned char)c); \
  44. }
  45. /* *INDENT-OFF* */
  46. DECLARE_CTYPE_WRAPPER (isalnum)
  47. DECLARE_CTYPE_WRAPPER (isalpha)
  48. DECLARE_CTYPE_WRAPPER (isascii)
  49. DECLARE_CTYPE_WRAPPER (isblank)
  50. DECLARE_CTYPE_WRAPPER (iscntrl)
  51. DECLARE_CTYPE_WRAPPER (isdigit)
  52. DECLARE_CTYPE_WRAPPER (isgraph)
  53. DECLARE_CTYPE_WRAPPER (islower)
  54. DECLARE_CTYPE_WRAPPER (isprint)
  55. DECLARE_CTYPE_WRAPPER (ispunct)
  56. DECLARE_CTYPE_WRAPPER (isspace)
  57. DECLARE_CTYPE_WRAPPER (isupper)
  58. DECLARE_CTYPE_WRAPPER (isxdigit)
  59. DECLARE_CTYPE_WRAPPER (toupper)
  60. DECLARE_CTYPE_WRAPPER (tolower)
  61. /* *INDENT-ON* */
  62. static void
  63. str_8bit_insert_replace_char (GString * buffer)
  64. {
  65. g_string_append_c (buffer, replch);
  66. }
  67. static int
  68. str_8bit_is_valid_string (const char *text)
  69. {
  70. (void) text;
  71. return 1;
  72. }
  73. static int
  74. str_8bit_is_valid_char (const char *ch, size_t size)
  75. {
  76. (void) ch;
  77. (void) size;
  78. return 1;
  79. }
  80. static void
  81. str_8bit_cnext_char (const char **text)
  82. {
  83. (*text)++;
  84. }
  85. static void
  86. str_8bit_cprev_char (const char **text)
  87. {
  88. (*text)--;
  89. }
  90. static int
  91. str_8bit_cnext_noncomb_char (const char **text)
  92. {
  93. if (*text[0] != '\0')
  94. {
  95. (*text)++;
  96. return 1;
  97. }
  98. else
  99. return 0;
  100. }
  101. static int
  102. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  103. {
  104. if ((*text) != begin)
  105. {
  106. (*text)--;
  107. return 1;
  108. }
  109. else
  110. return 0;
  111. }
  112. static int
  113. str_8bit_isspace (const char *text)
  114. {
  115. return char_isspace (text[0]);
  116. }
  117. static int
  118. str_8bit_ispunct (const char *text)
  119. {
  120. return char_ispunct (text[0]);
  121. }
  122. static int
  123. str_8bit_isalnum (const char *text)
  124. {
  125. return char_isalnum (text[0]);
  126. }
  127. static int
  128. str_8bit_isdigit (const char *text)
  129. {
  130. return char_isdigit (text[0]);
  131. }
  132. static int
  133. str_8bit_isprint (const char *text)
  134. {
  135. return char_isprint (text[0]);
  136. }
  137. static int
  138. str_8bit_iscombiningmark (const char *text)
  139. {
  140. (void) text;
  141. return 0;
  142. }
  143. static int
  144. str_8bit_toupper (const char *text, char **out, size_t * remain)
  145. {
  146. if (*remain <= 1)
  147. return 0;
  148. (*out)[0] = char_toupper (text[0]);
  149. (*out)++;
  150. (*remain)--;
  151. return 1;
  152. }
  153. static int
  154. str_8bit_tolower (const char *text, char **out, size_t * remain)
  155. {
  156. if (*remain <= 1)
  157. return 0;
  158. (*out)[0] = char_tolower (text[0]);
  159. (*out)++;
  160. (*remain)--;
  161. return 1;
  162. }
  163. static int
  164. str_8bit_length (const char *text)
  165. {
  166. return strlen (text);
  167. }
  168. static int
  169. str_8bit_length2 (const char *text, int size)
  170. {
  171. return (size >= 0) ? min (strlen (text), (gsize) size) : strlen (text);
  172. }
  173. static gchar *
  174. str_8bit_conv_gerror_message (GError * error, const char *def_msg)
  175. {
  176. GIConv conv;
  177. gchar *ret;
  178. /* glib messages are in UTF-8 charset */
  179. conv = str_crt_conv_from ("UTF-8");
  180. if (conv == INVALID_CONV)
  181. ret = g_strdup (def_msg != NULL ? def_msg : "");
  182. else
  183. {
  184. GString *buf;
  185. buf = g_string_new ("");
  186. if (str_convert (conv, error->message, buf) != ESTR_FAILURE)
  187. {
  188. ret = buf->str;
  189. g_string_free (buf, FALSE);
  190. }
  191. else
  192. {
  193. ret = g_strdup (def_msg != NULL ? def_msg : "");
  194. g_string_free (buf, TRUE);
  195. }
  196. str_close_conv (conv);
  197. }
  198. return ret;
  199. }
  200. static estr_t
  201. str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  202. {
  203. estr_t result;
  204. if (coder == str_cnv_not_convert)
  205. {
  206. g_string_append_len (buffer, string, size);
  207. result = ESTR_SUCCESS;
  208. }
  209. else
  210. result = str_nconvert (coder, (char *) string, size, buffer);
  211. return result;
  212. }
  213. static const char *
  214. str_8bit_term_form (const char *text)
  215. {
  216. static char result[BUF_MEDIUM];
  217. char *actual;
  218. size_t remain;
  219. size_t length;
  220. size_t pos = 0;
  221. actual = result;
  222. remain = sizeof (result);
  223. length = strlen (text);
  224. for (; pos < length && remain > 1; pos++, actual++, remain--)
  225. {
  226. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  227. }
  228. actual[0] = '\0';
  229. return result;
  230. }
  231. static const char *
  232. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  233. {
  234. static char result[BUF_MEDIUM];
  235. char *actual;
  236. size_t remain;
  237. int ident;
  238. size_t length;
  239. size_t pos = 0;
  240. length = strlen (text);
  241. actual = result;
  242. remain = sizeof (result);
  243. if ((int) length <= width)
  244. {
  245. ident = 0;
  246. switch (HIDE_FIT (just_mode))
  247. {
  248. case J_CENTER_LEFT:
  249. case J_CENTER:
  250. ident = (width - length) / 2;
  251. break;
  252. case J_RIGHT:
  253. ident = width - length;
  254. break;
  255. }
  256. if ((int) remain <= ident)
  257. goto finally;
  258. memset (actual, ' ', ident);
  259. actual += ident;
  260. remain -= ident;
  261. for (; pos < length && remain > 1; pos++, actual++, remain--)
  262. {
  263. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  264. }
  265. if (width - length - ident > 0)
  266. {
  267. if (remain <= width - length - ident)
  268. goto finally;
  269. memset (actual, ' ', width - length - ident);
  270. actual += width - length - ident;
  271. remain -= width - length - ident;
  272. }
  273. }
  274. else
  275. {
  276. if (IS_FIT (just_mode))
  277. {
  278. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  279. {
  280. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  281. }
  282. if (remain <= 1)
  283. goto finally;
  284. actual[0] = '~';
  285. actual++;
  286. remain--;
  287. pos += length - width + 1;
  288. for (; pos < length && remain > 1; pos++, actual++, remain--)
  289. {
  290. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  291. }
  292. }
  293. else
  294. {
  295. ident = 0;
  296. switch (HIDE_FIT (just_mode))
  297. {
  298. case J_CENTER:
  299. ident = (length - width) / 2;
  300. break;
  301. case J_RIGHT:
  302. ident = length - width;
  303. break;
  304. }
  305. pos += ident;
  306. for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
  307. {
  308. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  309. }
  310. }
  311. }
  312. finally:
  313. actual[0] = '\0';
  314. return result;
  315. }
  316. static const char *
  317. str_8bit_term_trim (const char *text, int width)
  318. {
  319. static char result[BUF_MEDIUM];
  320. size_t remain;
  321. char *actual;
  322. size_t pos = 0;
  323. size_t length;
  324. length = strlen (text);
  325. actual = result;
  326. remain = sizeof (result);
  327. if (width > 0)
  328. {
  329. if (width < (int) length)
  330. {
  331. if (width <= 3)
  332. {
  333. memset (actual, '.', width);
  334. actual += width;
  335. remain -= width;
  336. }
  337. else
  338. {
  339. memset (actual, '.', 3);
  340. actual += 3;
  341. remain -= 3;
  342. pos += length - width + 3;
  343. for (; pos < length && remain > 1; pos++, actual++, remain--)
  344. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  345. }
  346. }
  347. else
  348. {
  349. for (; pos < length && remain > 1; pos++, actual++, remain--)
  350. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  351. }
  352. }
  353. actual[0] = '\0';
  354. return result;
  355. }
  356. static int
  357. str_8bit_term_width2 (const char *text, size_t length)
  358. {
  359. return (length != (size_t) (-1)) ? min (strlen (text), length) : strlen (text);
  360. }
  361. static int
  362. str_8bit_term_width1 (const char *text)
  363. {
  364. return str_8bit_term_width2 (text, (size_t) (-1));
  365. }
  366. static int
  367. str_8bit_term_char_width (const char *text)
  368. {
  369. (void) text;
  370. return 1;
  371. }
  372. static const char *
  373. str_8bit_term_substring (const char *text, int start, int width)
  374. {
  375. static char result[BUF_MEDIUM];
  376. size_t remain;
  377. char *actual;
  378. size_t pos = 0;
  379. size_t length;
  380. actual = result;
  381. remain = sizeof (result);
  382. length = strlen (text);
  383. if (start < (int) length)
  384. {
  385. pos += start;
  386. for (; pos < length && width > 0 && remain > 1; pos++, width--, actual++, remain--)
  387. {
  388. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  389. }
  390. }
  391. for (; width > 0 && remain > 1; actual++, remain--, width--)
  392. {
  393. actual[0] = ' ';
  394. }
  395. actual[0] = '\0';
  396. return result;
  397. }
  398. static const char *
  399. str_8bit_trunc (const char *text, int width)
  400. {
  401. static char result[MC_MAXPATHLEN];
  402. int remain;
  403. char *actual;
  404. size_t pos = 0;
  405. size_t length;
  406. actual = result;
  407. remain = sizeof (result);
  408. length = strlen (text);
  409. if ((int) length > width)
  410. {
  411. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  412. {
  413. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  414. }
  415. if (remain <= 1)
  416. goto finally;
  417. actual[0] = '~';
  418. actual++;
  419. remain--;
  420. pos += length - width + 1;
  421. for (; pos < length && remain > 1; pos++, actual++, remain--)
  422. {
  423. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  424. }
  425. }
  426. else
  427. {
  428. for (; pos < length && remain > 1; pos++, actual++, remain--)
  429. {
  430. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  431. }
  432. }
  433. finally:
  434. actual[0] = '\0';
  435. return result;
  436. }
  437. static int
  438. str_8bit_offset_to_pos (const char *text, size_t length)
  439. {
  440. (void) text;
  441. return (int) length;
  442. }
  443. static int
  444. str_8bit_column_to_pos (const char *text, size_t pos)
  445. {
  446. (void) text;
  447. return (int) pos;
  448. }
  449. static char *
  450. str_8bit_create_search_needle (const char *needle, int case_sen)
  451. {
  452. (void) case_sen;
  453. return (char *) needle;
  454. }
  455. static void
  456. str_8bit_release_search_needle (char *needle, int case_sen)
  457. {
  458. (void) case_sen;
  459. (void) needle;
  460. }
  461. static char *
  462. str_8bit_strdown (const char *str)
  463. {
  464. char *rets, *p;
  465. rets = g_strdup (str);
  466. if (rets == NULL)
  467. return NULL;
  468. for (p = rets; *p != '\0'; p++)
  469. *p = char_tolower (*p);
  470. return rets;
  471. }
  472. static const char *
  473. str_8bit_search_first (const char *text, const char *search, int case_sen)
  474. {
  475. char *fold_text;
  476. char *fold_search;
  477. const char *match;
  478. size_t offsset;
  479. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  480. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  481. match = g_strstr_len (fold_text, -1, fold_search);
  482. if (match != NULL)
  483. {
  484. offsset = match - fold_text;
  485. match = text + offsset;
  486. }
  487. if (!case_sen)
  488. {
  489. g_free (fold_text);
  490. g_free (fold_search);
  491. }
  492. return match;
  493. }
  494. static const char *
  495. str_8bit_search_last (const char *text, const char *search, int case_sen)
  496. {
  497. char *fold_text;
  498. char *fold_search;
  499. const char *match;
  500. size_t offsset;
  501. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  502. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  503. match = g_strrstr_len (fold_text, -1, fold_search);
  504. if (match != NULL)
  505. {
  506. offsset = match - fold_text;
  507. match = text + offsset;
  508. }
  509. if (!case_sen)
  510. {
  511. g_free (fold_text);
  512. g_free (fold_search);
  513. }
  514. return match;
  515. }
  516. static int
  517. str_8bit_compare (const char *t1, const char *t2)
  518. {
  519. return strcmp (t1, t2);
  520. }
  521. static int
  522. str_8bit_ncompare (const char *t1, const char *t2)
  523. {
  524. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  525. }
  526. static int
  527. str_8bit_casecmp (const char *s1, const char *s2)
  528. {
  529. /* code from GLib */
  530. #ifdef HAVE_STRCASECMP
  531. g_return_val_if_fail (s1 != NULL, 0);
  532. g_return_val_if_fail (s2 != NULL, 0);
  533. return strcasecmp (s1, s2);
  534. #else
  535. gint c1, c2;
  536. g_return_val_if_fail (s1 != NULL, 0);
  537. g_return_val_if_fail (s2 != NULL, 0);
  538. while (*s1 != '\0' && *s2 != '\0')
  539. {
  540. /* According to A. Cox, some platforms have islower's that
  541. * don't work right on non-uppercase
  542. */
  543. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  544. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  545. if (c1 != c2)
  546. return (c1 - c2);
  547. s1++;
  548. s2++;
  549. }
  550. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  551. #endif
  552. }
  553. static int
  554. str_8bit_ncasecmp (const char *s1, const char *s2)
  555. {
  556. size_t n;
  557. g_return_val_if_fail (s1 != NULL, 0);
  558. g_return_val_if_fail (s2 != NULL, 0);
  559. n = min (strlen (s1), strlen (s2));
  560. /* code from GLib */
  561. #ifdef HAVE_STRNCASECMP
  562. return strncasecmp (s1, s2, n);
  563. #else
  564. gint c1, c2;
  565. while (n != 0 && *s1 != '\0' && *s2 != '\0')
  566. {
  567. n -= 1;
  568. /* According to A. Cox, some platforms have islower's that
  569. * don't work right on non-uppercase
  570. */
  571. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  572. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  573. if (c1 != c2)
  574. return (c1 - c2);
  575. s1++;
  576. s2++;
  577. }
  578. if (n != 0)
  579. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  580. else
  581. return 0;
  582. #endif
  583. }
  584. static int
  585. str_8bit_prefix (const char *text, const char *prefix)
  586. {
  587. int result;
  588. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  589. && text[result] == prefix[result]; result++);
  590. return result;
  591. }
  592. static int
  593. str_8bit_caseprefix (const char *text, const char *prefix)
  594. {
  595. int result;
  596. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  597. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  598. return result;
  599. }
  600. static void
  601. str_8bit_fix_string (char *text)
  602. {
  603. (void) text;
  604. }
  605. static char *
  606. str_8bit_create_key (const char *text, int case_sen)
  607. {
  608. return (case_sen) ? (char *) text : str_8bit_strdown (text);
  609. }
  610. static int
  611. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  612. {
  613. if (case_sen)
  614. return strcmp (t1, t2);
  615. else
  616. return strcoll (t1, t2);
  617. }
  618. static void
  619. str_8bit_release_key (char *key, int case_sen)
  620. {
  621. if (!case_sen)
  622. g_free (key);
  623. }
  624. struct str_class
  625. str_8bit_init (void)
  626. {
  627. struct str_class result;
  628. result.conv_gerror_message = str_8bit_conv_gerror_message;
  629. result.vfs_convert_to = str_8bit_vfs_convert_to;
  630. result.insert_replace_char = str_8bit_insert_replace_char;
  631. result.is_valid_string = str_8bit_is_valid_string;
  632. result.is_valid_char = str_8bit_is_valid_char;
  633. result.cnext_char = str_8bit_cnext_char;
  634. result.cprev_char = str_8bit_cprev_char;
  635. result.cnext_char_safe = str_8bit_cnext_char;
  636. result.cprev_char_safe = str_8bit_cprev_char;
  637. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  638. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  639. result.isspace = str_8bit_isspace;
  640. result.ispunct = str_8bit_ispunct;
  641. result.isalnum = str_8bit_isalnum;
  642. result.isdigit = str_8bit_isdigit;
  643. result.isprint = str_8bit_isprint;
  644. result.iscombiningmark = str_8bit_iscombiningmark;
  645. result.toupper = str_8bit_toupper;
  646. result.tolower = str_8bit_tolower;
  647. result.length = str_8bit_length;
  648. result.length2 = str_8bit_length2;
  649. result.length_noncomb = str_8bit_length;
  650. result.fix_string = str_8bit_fix_string;
  651. result.term_form = str_8bit_term_form;
  652. result.fit_to_term = str_8bit_fit_to_term;
  653. result.term_trim = str_8bit_term_trim;
  654. result.term_width2 = str_8bit_term_width2;
  655. result.term_width1 = str_8bit_term_width1;
  656. result.term_char_width = str_8bit_term_char_width;
  657. result.term_substring = str_8bit_term_substring;
  658. result.trunc = str_8bit_trunc;
  659. result.offset_to_pos = str_8bit_offset_to_pos;
  660. result.column_to_pos = str_8bit_column_to_pos;
  661. result.create_search_needle = str_8bit_create_search_needle;
  662. result.release_search_needle = str_8bit_release_search_needle;
  663. result.search_first = str_8bit_search_first;
  664. result.search_last = str_8bit_search_last;
  665. result.compare = str_8bit_compare;
  666. result.ncompare = str_8bit_ncompare;
  667. result.casecmp = str_8bit_casecmp;
  668. result.ncasecmp = str_8bit_ncasecmp;
  669. result.prefix = str_8bit_prefix;
  670. result.caseprefix = str_8bit_caseprefix;
  671. result.create_key = str_8bit_create_key;
  672. result.create_key_for_filename = str_8bit_create_key;
  673. result.key_collate = str_8bit_key_collate;
  674. result.release_key = str_8bit_release_key;
  675. return result;
  676. }