strutil8bit.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799
  1. /*
  2. 8bit strings utilities
  3. Copyright (C) 2007, 2011
  4. The Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. The file_date routine is mostly from GNU's fileutils package,
  8. written by Richard Stallman and David MacKenzie.
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdio.h>
  23. #include <ctype.h>
  24. #include <errno.h>
  25. #include "lib/global.h"
  26. #include "lib/strutil.h"
  27. /* functions for singlebyte encodings, all characters have width 1
  28. * using standard system functions
  29. * there are only small differences between functions in strutil8bit.c
  30. * and strutilascii.c
  31. */
  32. static const char replch = '?';
  33. /*
  34. * Inlines to equalize 'char' signedness for single 'char' encodings.
  35. * Instead of writing
  36. * isspace((unsigned char)c);
  37. * you can write
  38. * char_isspace(c);
  39. */
  40. #define DECLARE_CTYPE_WRAPPER(func_name) \
  41. static inline int char_##func_name(char c) \
  42. { \
  43. return func_name((int)(unsigned char)c); \
  44. }
  45. /* *INDENT-OFF* */
  46. DECLARE_CTYPE_WRAPPER (isalnum)
  47. DECLARE_CTYPE_WRAPPER (isalpha)
  48. DECLARE_CTYPE_WRAPPER (isascii)
  49. DECLARE_CTYPE_WRAPPER (isblank)
  50. DECLARE_CTYPE_WRAPPER (iscntrl)
  51. DECLARE_CTYPE_WRAPPER (isdigit)
  52. DECLARE_CTYPE_WRAPPER (isgraph)
  53. DECLARE_CTYPE_WRAPPER (islower)
  54. DECLARE_CTYPE_WRAPPER (isprint)
  55. DECLARE_CTYPE_WRAPPER (ispunct)
  56. DECLARE_CTYPE_WRAPPER (isspace)
  57. DECLARE_CTYPE_WRAPPER (isupper)
  58. DECLARE_CTYPE_WRAPPER (isxdigit)
  59. DECLARE_CTYPE_WRAPPER (toupper)
  60. DECLARE_CTYPE_WRAPPER (tolower)
  61. /* *INDENT-ON* */
  62. static void
  63. str_8bit_insert_replace_char (GString * buffer)
  64. {
  65. g_string_append_c (buffer, replch);
  66. }
  67. static int
  68. str_8bit_is_valid_string (const char *text)
  69. {
  70. (void) text;
  71. return 1;
  72. }
  73. static int
  74. str_8bit_is_valid_char (const char *ch, size_t size)
  75. {
  76. (void) ch;
  77. (void) size;
  78. return 1;
  79. }
  80. static void
  81. str_8bit_cnext_char (const char **text)
  82. {
  83. (*text)++;
  84. }
  85. static void
  86. str_8bit_cprev_char (const char **text)
  87. {
  88. (*text)--;
  89. }
  90. static int
  91. str_8bit_cnext_noncomb_char (const char **text)
  92. {
  93. if (*text[0] != '\0')
  94. {
  95. (*text)++;
  96. return 1;
  97. }
  98. else
  99. return 0;
  100. }
  101. static int
  102. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  103. {
  104. if ((*text) != begin)
  105. {
  106. (*text)--;
  107. return 1;
  108. }
  109. else
  110. return 0;
  111. }
  112. static int
  113. str_8bit_isspace (const char *text)
  114. {
  115. return char_isspace (text[0]);
  116. }
  117. static int
  118. str_8bit_ispunct (const char *text)
  119. {
  120. return char_ispunct (text[0]);
  121. }
  122. static int
  123. str_8bit_isalnum (const char *text)
  124. {
  125. return char_isalnum (text[0]);
  126. }
  127. static int
  128. str_8bit_isdigit (const char *text)
  129. {
  130. return char_isdigit (text[0]);
  131. }
  132. static int
  133. str_8bit_isprint (const char *text)
  134. {
  135. return char_isprint (text[0]);
  136. }
  137. static int
  138. str_8bit_iscombiningmark (const char *text)
  139. {
  140. (void) text;
  141. return 0;
  142. }
  143. static int
  144. str_8bit_toupper (const char *text, char **out, size_t * remain)
  145. {
  146. if (*remain <= 1)
  147. return 0;
  148. (*out)[0] = char_toupper (text[0]);
  149. (*out)++;
  150. (*remain)--;
  151. return 1;
  152. }
  153. static int
  154. str_8bit_tolower (const char *text, char **out, size_t * remain)
  155. {
  156. if (*remain <= 1)
  157. return 0;
  158. (*out)[0] = char_tolower (text[0]);
  159. (*out)++;
  160. (*remain)--;
  161. return 1;
  162. }
  163. static int
  164. str_8bit_length (const char *text)
  165. {
  166. return strlen (text);
  167. }
  168. static int
  169. str_8bit_length2 (const char *text, int size)
  170. {
  171. return (size >= 0) ? min (strlen (text), (gsize) size) : strlen (text);
  172. }
  173. static gchar *
  174. str_8bit_conv_gerror_message (GError * error, const char *def_msg)
  175. {
  176. GIConv conv;
  177. gchar *ret;
  178. /* glib messages are in UTF-8 charset */
  179. conv = str_crt_conv_from ("UTF-8");
  180. if (conv == INVALID_CONV)
  181. ret = g_strdup (def_msg != NULL ? def_msg : "");
  182. else
  183. {
  184. GString *buf;
  185. buf = g_string_new ("");
  186. if (str_convert (conv, error->message, buf) != ESTR_FAILURE)
  187. {
  188. ret = buf->str;
  189. g_string_free (buf, FALSE);
  190. }
  191. else
  192. {
  193. ret = g_strdup (def_msg != NULL ? def_msg : "");
  194. g_string_free (buf, TRUE);
  195. }
  196. str_close_conv (conv);
  197. }
  198. return ret;
  199. }
  200. static estr_t
  201. str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  202. {
  203. estr_t result;
  204. if (coder == str_cnv_not_convert)
  205. {
  206. g_string_append_len (buffer, string, size);
  207. result = ESTR_SUCCESS;
  208. }
  209. else
  210. result = str_nconvert (coder, (char *) string, size, buffer);
  211. return result;
  212. }
  213. static const char *
  214. str_8bit_term_form (const char *text)
  215. {
  216. static char result[BUF_MEDIUM];
  217. char *actual;
  218. size_t remain;
  219. size_t length;
  220. size_t pos = 0;
  221. actual = result;
  222. remain = sizeof (result);
  223. length = strlen (text);
  224. for (; pos < length && remain > 1; pos++, actual++, remain--)
  225. {
  226. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  227. }
  228. actual[0] = '\0';
  229. return result;
  230. }
  231. static const char *
  232. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  233. {
  234. static char result[BUF_MEDIUM];
  235. char *actual;
  236. size_t remain;
  237. int ident;
  238. size_t length;
  239. size_t pos = 0;
  240. length = strlen (text);
  241. actual = result;
  242. remain = sizeof (result);
  243. if ((int) length <= width)
  244. {
  245. ident = 0;
  246. switch (HIDE_FIT (just_mode))
  247. {
  248. case J_CENTER_LEFT:
  249. case J_CENTER:
  250. ident = (width - length) / 2;
  251. break;
  252. case J_RIGHT:
  253. ident = width - length;
  254. break;
  255. }
  256. if ((int) remain <= ident)
  257. goto finally;
  258. memset (actual, ' ', ident);
  259. actual += ident;
  260. remain -= ident;
  261. for (; pos < length && remain > 1; pos++, actual++, remain--)
  262. {
  263. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  264. }
  265. if (width - length - ident > 0)
  266. {
  267. if (remain <= width - length - ident)
  268. goto finally;
  269. memset (actual, ' ', width - length - ident);
  270. actual += width - length - ident;
  271. }
  272. }
  273. else
  274. {
  275. if (IS_FIT (just_mode))
  276. {
  277. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  278. {
  279. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  280. }
  281. if (remain <= 1)
  282. goto finally;
  283. actual[0] = '~';
  284. actual++;
  285. remain--;
  286. pos += length - width + 1;
  287. for (; pos < length && remain > 1; pos++, actual++, remain--)
  288. {
  289. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  290. }
  291. }
  292. else
  293. {
  294. ident = 0;
  295. switch (HIDE_FIT (just_mode))
  296. {
  297. case J_CENTER:
  298. ident = (length - width) / 2;
  299. break;
  300. case J_RIGHT:
  301. ident = length - width;
  302. break;
  303. }
  304. pos += ident;
  305. for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
  306. {
  307. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  308. }
  309. }
  310. }
  311. finally:
  312. actual[0] = '\0';
  313. return result;
  314. }
  315. static const char *
  316. str_8bit_term_trim (const char *text, int width)
  317. {
  318. static char result[BUF_MEDIUM];
  319. size_t remain;
  320. char *actual;
  321. size_t pos = 0;
  322. size_t length;
  323. length = strlen (text);
  324. actual = result;
  325. remain = sizeof (result);
  326. if (width > 0)
  327. {
  328. if (width < (int) length)
  329. {
  330. if (width <= 3)
  331. {
  332. memset (actual, '.', width);
  333. actual += width;
  334. }
  335. else
  336. {
  337. memset (actual, '.', 3);
  338. actual += 3;
  339. remain -= 3;
  340. pos += length - width + 3;
  341. for (; pos < length && remain > 1; pos++, actual++, remain--)
  342. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  343. }
  344. }
  345. else
  346. {
  347. for (; pos < length && remain > 1; pos++, actual++, remain--)
  348. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  349. }
  350. }
  351. actual[0] = '\0';
  352. return result;
  353. }
  354. static int
  355. str_8bit_term_width2 (const char *text, size_t length)
  356. {
  357. return (length != (size_t) (-1)) ? min (strlen (text), length) : strlen (text);
  358. }
  359. static int
  360. str_8bit_term_width1 (const char *text)
  361. {
  362. return str_8bit_term_width2 (text, (size_t) (-1));
  363. }
  364. static int
  365. str_8bit_term_char_width (const char *text)
  366. {
  367. (void) text;
  368. return 1;
  369. }
  370. static const char *
  371. str_8bit_term_substring (const char *text, int start, int width)
  372. {
  373. static char result[BUF_MEDIUM];
  374. size_t remain;
  375. char *actual;
  376. size_t pos = 0;
  377. size_t length;
  378. actual = result;
  379. remain = sizeof (result);
  380. length = strlen (text);
  381. if (start < (int) length)
  382. {
  383. pos += start;
  384. for (; pos < length && width > 0 && remain > 1; pos++, width--, actual++, remain--)
  385. {
  386. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  387. }
  388. }
  389. for (; width > 0 && remain > 1; actual++, remain--, width--)
  390. {
  391. actual[0] = ' ';
  392. }
  393. actual[0] = '\0';
  394. return result;
  395. }
  396. static const char *
  397. str_8bit_trunc (const char *text, int width)
  398. {
  399. static char result[MC_MAXPATHLEN];
  400. int remain;
  401. char *actual;
  402. size_t pos = 0;
  403. size_t length;
  404. actual = result;
  405. remain = sizeof (result);
  406. length = strlen (text);
  407. if ((int) length > width)
  408. {
  409. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  410. {
  411. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  412. }
  413. if (remain <= 1)
  414. goto finally;
  415. actual[0] = '~';
  416. actual++;
  417. remain--;
  418. pos += length - width + 1;
  419. for (; pos < length && remain > 1; pos++, actual++, remain--)
  420. {
  421. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  422. }
  423. }
  424. else
  425. {
  426. for (; pos < length && remain > 1; pos++, actual++, remain--)
  427. {
  428. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  429. }
  430. }
  431. finally:
  432. actual[0] = '\0';
  433. return result;
  434. }
  435. static int
  436. str_8bit_offset_to_pos (const char *text, size_t length)
  437. {
  438. (void) text;
  439. return (int) length;
  440. }
  441. static int
  442. str_8bit_column_to_pos (const char *text, size_t pos)
  443. {
  444. (void) text;
  445. return (int) pos;
  446. }
  447. static char *
  448. str_8bit_create_search_needle (const char *needle, int case_sen)
  449. {
  450. (void) case_sen;
  451. return (char *) needle;
  452. }
  453. static void
  454. str_8bit_release_search_needle (char *needle, int case_sen)
  455. {
  456. (void) case_sen;
  457. (void) needle;
  458. }
  459. static char *
  460. str_8bit_strdown (const char *str)
  461. {
  462. char *rets, *p;
  463. rets = g_strdup (str);
  464. if (rets == NULL)
  465. return NULL;
  466. for (p = rets; *p != '\0'; p++)
  467. *p = char_tolower (*p);
  468. return rets;
  469. }
  470. static const char *
  471. str_8bit_search_first (const char *text, const char *search, int case_sen)
  472. {
  473. char *fold_text;
  474. char *fold_search;
  475. const char *match;
  476. size_t offsset;
  477. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  478. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  479. match = g_strstr_len (fold_text, -1, fold_search);
  480. if (match != NULL)
  481. {
  482. offsset = match - fold_text;
  483. match = text + offsset;
  484. }
  485. if (!case_sen)
  486. {
  487. g_free (fold_text);
  488. g_free (fold_search);
  489. }
  490. return match;
  491. }
  492. static const char *
  493. str_8bit_search_last (const char *text, const char *search, int case_sen)
  494. {
  495. char *fold_text;
  496. char *fold_search;
  497. const char *match;
  498. size_t offsset;
  499. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  500. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  501. match = g_strrstr_len (fold_text, -1, fold_search);
  502. if (match != NULL)
  503. {
  504. offsset = match - fold_text;
  505. match = text + offsset;
  506. }
  507. if (!case_sen)
  508. {
  509. g_free (fold_text);
  510. g_free (fold_search);
  511. }
  512. return match;
  513. }
  514. static int
  515. str_8bit_compare (const char *t1, const char *t2)
  516. {
  517. return strcmp (t1, t2);
  518. }
  519. static int
  520. str_8bit_ncompare (const char *t1, const char *t2)
  521. {
  522. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  523. }
  524. static int
  525. str_8bit_casecmp (const char *s1, const char *s2)
  526. {
  527. /* code from GLib */
  528. #ifdef HAVE_STRCASECMP
  529. g_return_val_if_fail (s1 != NULL, 0);
  530. g_return_val_if_fail (s2 != NULL, 0);
  531. return strcasecmp (s1, s2);
  532. #else
  533. gint c1, c2;
  534. g_return_val_if_fail (s1 != NULL, 0);
  535. g_return_val_if_fail (s2 != NULL, 0);
  536. while (*s1 != '\0' && *s2 != '\0')
  537. {
  538. /* According to A. Cox, some platforms have islower's that
  539. * don't work right on non-uppercase
  540. */
  541. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  542. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  543. if (c1 != c2)
  544. return (c1 - c2);
  545. s1++;
  546. s2++;
  547. }
  548. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  549. #endif
  550. }
  551. static int
  552. str_8bit_ncasecmp (const char *s1, const char *s2)
  553. {
  554. size_t n;
  555. g_return_val_if_fail (s1 != NULL, 0);
  556. g_return_val_if_fail (s2 != NULL, 0);
  557. n = min (strlen (s1), strlen (s2));
  558. /* code from GLib */
  559. #ifdef HAVE_STRNCASECMP
  560. return strncasecmp (s1, s2, n);
  561. #else
  562. gint c1, c2;
  563. while (n != 0 && *s1 != '\0' && *s2 != '\0')
  564. {
  565. n -= 1;
  566. /* According to A. Cox, some platforms have islower's that
  567. * don't work right on non-uppercase
  568. */
  569. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  570. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  571. if (c1 != c2)
  572. return (c1 - c2);
  573. s1++;
  574. s2++;
  575. }
  576. if (n != 0)
  577. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  578. else
  579. return 0;
  580. #endif
  581. }
  582. static int
  583. str_8bit_prefix (const char *text, const char *prefix)
  584. {
  585. int result;
  586. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  587. && text[result] == prefix[result]; result++);
  588. return result;
  589. }
  590. static int
  591. str_8bit_caseprefix (const char *text, const char *prefix)
  592. {
  593. int result;
  594. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  595. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  596. return result;
  597. }
  598. static void
  599. str_8bit_fix_string (char *text)
  600. {
  601. (void) text;
  602. }
  603. static char *
  604. str_8bit_create_key (const char *text, int case_sen)
  605. {
  606. return (case_sen) ? (char *) text : str_8bit_strdown (text);
  607. }
  608. static int
  609. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  610. {
  611. if (case_sen)
  612. return strcmp (t1, t2);
  613. else
  614. return strcoll (t1, t2);
  615. }
  616. static void
  617. str_8bit_release_key (char *key, int case_sen)
  618. {
  619. if (!case_sen)
  620. g_free (key);
  621. }
  622. struct str_class
  623. str_8bit_init (void)
  624. {
  625. struct str_class result;
  626. result.conv_gerror_message = str_8bit_conv_gerror_message;
  627. result.vfs_convert_to = str_8bit_vfs_convert_to;
  628. result.insert_replace_char = str_8bit_insert_replace_char;
  629. result.is_valid_string = str_8bit_is_valid_string;
  630. result.is_valid_char = str_8bit_is_valid_char;
  631. result.cnext_char = str_8bit_cnext_char;
  632. result.cprev_char = str_8bit_cprev_char;
  633. result.cnext_char_safe = str_8bit_cnext_char;
  634. result.cprev_char_safe = str_8bit_cprev_char;
  635. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  636. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  637. result.isspace = str_8bit_isspace;
  638. result.ispunct = str_8bit_ispunct;
  639. result.isalnum = str_8bit_isalnum;
  640. result.isdigit = str_8bit_isdigit;
  641. result.isprint = str_8bit_isprint;
  642. result.iscombiningmark = str_8bit_iscombiningmark;
  643. result.toupper = str_8bit_toupper;
  644. result.tolower = str_8bit_tolower;
  645. result.length = str_8bit_length;
  646. result.length2 = str_8bit_length2;
  647. result.length_noncomb = str_8bit_length;
  648. result.fix_string = str_8bit_fix_string;
  649. result.term_form = str_8bit_term_form;
  650. result.fit_to_term = str_8bit_fit_to_term;
  651. result.term_trim = str_8bit_term_trim;
  652. result.term_width2 = str_8bit_term_width2;
  653. result.term_width1 = str_8bit_term_width1;
  654. result.term_char_width = str_8bit_term_char_width;
  655. result.term_substring = str_8bit_term_substring;
  656. result.trunc = str_8bit_trunc;
  657. result.offset_to_pos = str_8bit_offset_to_pos;
  658. result.column_to_pos = str_8bit_column_to_pos;
  659. result.create_search_needle = str_8bit_create_search_needle;
  660. result.release_search_needle = str_8bit_release_search_needle;
  661. result.search_first = str_8bit_search_first;
  662. result.search_last = str_8bit_search_last;
  663. result.compare = str_8bit_compare;
  664. result.ncompare = str_8bit_ncompare;
  665. result.casecmp = str_8bit_casecmp;
  666. result.ncasecmp = str_8bit_ncasecmp;
  667. result.prefix = str_8bit_prefix;
  668. result.caseprefix = str_8bit_caseprefix;
  669. result.create_key = str_8bit_create_key;
  670. result.create_key_for_filename = str_8bit_create_key;
  671. result.key_collate = str_8bit_key_collate;
  672. result.release_key = str_8bit_release_key;
  673. return result;
  674. }