strutil8bit.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. /* 8bit strings utilities
  2. Copyright (C) 2007 Free Software Foundation, Inc.
  3. Written 2007 by:
  4. Rostislav Benes
  5. The file_date routine is mostly from GNU's fileutils package,
  6. written by Richard Stallman and David MacKenzie.
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #include <config.h>
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <errno.h>
  23. #include "lib/global.h"
  24. #include "lib/strutil.h"
  25. /* functions for singlebyte encodings, all characters have width 1
  26. * using standard system functions
  27. * there are only small differences between functions in strutil8bit.c
  28. * and strutilascii.c
  29. */
  30. static const char replch = '?';
  31. /*
  32. * Inlines to equalize 'char' signedness for single 'char' encodings.
  33. * Instead of writing
  34. * isspace((unsigned char)c);
  35. * you can write
  36. * char_isspace(c);
  37. */
  38. #define DECLARE_CTYPE_WRAPPER(func_name) \
  39. static inline int char_##func_name(char c) \
  40. { \
  41. return func_name((int)(unsigned char)c); \
  42. }
  43. /* *INDENT-OFF* */
  44. DECLARE_CTYPE_WRAPPER (isalnum)
  45. DECLARE_CTYPE_WRAPPER (isalpha)
  46. DECLARE_CTYPE_WRAPPER (isascii)
  47. DECLARE_CTYPE_WRAPPER (isblank)
  48. DECLARE_CTYPE_WRAPPER (iscntrl)
  49. DECLARE_CTYPE_WRAPPER (isdigit)
  50. DECLARE_CTYPE_WRAPPER (isgraph)
  51. DECLARE_CTYPE_WRAPPER (islower)
  52. DECLARE_CTYPE_WRAPPER (isprint)
  53. DECLARE_CTYPE_WRAPPER (ispunct)
  54. DECLARE_CTYPE_WRAPPER (isspace)
  55. DECLARE_CTYPE_WRAPPER (isupper)
  56. DECLARE_CTYPE_WRAPPER (isxdigit)
  57. DECLARE_CTYPE_WRAPPER (toupper)
  58. DECLARE_CTYPE_WRAPPER (tolower)
  59. /* *INDENT-ON* */
  60. static void
  61. str_8bit_insert_replace_char (GString * buffer)
  62. {
  63. g_string_append_c (buffer, replch);
  64. }
  65. static int
  66. str_8bit_is_valid_string (const char *text)
  67. {
  68. (void) text;
  69. return 1;
  70. }
  71. static int
  72. str_8bit_is_valid_char (const char *ch, size_t size)
  73. {
  74. (void) ch;
  75. (void) size;
  76. return 1;
  77. }
  78. static void
  79. str_8bit_cnext_char (const char **text)
  80. {
  81. (*text)++;
  82. }
  83. static void
  84. str_8bit_cprev_char (const char **text)
  85. {
  86. (*text)--;
  87. }
  88. static int
  89. str_8bit_cnext_noncomb_char (const char **text)
  90. {
  91. if (*text[0] != '\0')
  92. {
  93. (*text)++;
  94. return 1;
  95. }
  96. else
  97. return 0;
  98. }
  99. static int
  100. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  101. {
  102. if ((*text) != begin)
  103. {
  104. (*text)--;
  105. return 1;
  106. }
  107. else
  108. return 0;
  109. }
  110. static int
  111. str_8bit_isspace (const char *text)
  112. {
  113. return char_isspace (text[0]);
  114. }
  115. static int
  116. str_8bit_ispunct (const char *text)
  117. {
  118. return char_ispunct (text[0]);
  119. }
  120. static int
  121. str_8bit_isalnum (const char *text)
  122. {
  123. return char_isalnum (text[0]);
  124. }
  125. static int
  126. str_8bit_isdigit (const char *text)
  127. {
  128. return char_isdigit (text[0]);
  129. }
  130. static int
  131. str_8bit_isprint (const char *text)
  132. {
  133. return char_isprint (text[0]);
  134. }
  135. static int
  136. str_8bit_iscombiningmark (const char *text)
  137. {
  138. (void) text;
  139. return 0;
  140. }
  141. static int
  142. str_8bit_toupper (const char *text, char **out, size_t * remain)
  143. {
  144. if (*remain <= 1)
  145. return 0;
  146. (*out)[0] = char_toupper (text[0]);
  147. (*out)++;
  148. (*remain)--;
  149. return 1;
  150. }
  151. static int
  152. str_8bit_tolower (const char *text, char **out, size_t * remain)
  153. {
  154. if (*remain <= 1)
  155. return 0;
  156. (*out)[0] = char_tolower (text[0]);
  157. (*out)++;
  158. (*remain)--;
  159. return 1;
  160. }
  161. static int
  162. str_8bit_length (const char *text)
  163. {
  164. return strlen (text);
  165. }
  166. static int
  167. str_8bit_length2 (const char *text, int size)
  168. {
  169. return (size >= 0) ? min (strlen (text), (gsize) size) : strlen (text);
  170. }
  171. static gchar *
  172. str_8bit_conv_gerror_message (GError * error, const char *def_msg)
  173. {
  174. GIConv conv;
  175. gchar *ret;
  176. /* glib messages are in UTF-8 charset */
  177. conv = str_crt_conv_from ("UTF-8");
  178. if (conv == INVALID_CONV)
  179. ret = g_strdup (def_msg != NULL ? def_msg : "");
  180. else
  181. {
  182. GString *buf;
  183. buf = g_string_new ("");
  184. if (str_convert (conv, error->message, buf) != ESTR_FAILURE)
  185. {
  186. ret = buf->str;
  187. g_string_free (buf, FALSE);
  188. }
  189. else
  190. {
  191. ret = g_strdup (def_msg != NULL ? def_msg : "");
  192. g_string_free (buf, TRUE);
  193. }
  194. str_close_conv (conv);
  195. }
  196. return ret;
  197. }
  198. static estr_t
  199. str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  200. {
  201. estr_t result;
  202. if (coder == str_cnv_not_convert)
  203. {
  204. g_string_append_len (buffer, string, size);
  205. result = ESTR_SUCCESS;
  206. }
  207. else
  208. result = str_nconvert (coder, (char *) string, size, buffer);
  209. return result;
  210. }
  211. static const char *
  212. str_8bit_term_form (const char *text)
  213. {
  214. static char result[BUF_MEDIUM];
  215. char *actual;
  216. size_t remain;
  217. size_t length;
  218. size_t pos = 0;
  219. actual = result;
  220. remain = sizeof (result);
  221. length = strlen (text);
  222. for (; pos < length && remain > 1; pos++, actual++, remain--)
  223. {
  224. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  225. }
  226. actual[0] = '\0';
  227. return result;
  228. }
  229. static const char *
  230. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  231. {
  232. static char result[BUF_MEDIUM];
  233. char *actual;
  234. size_t remain;
  235. int ident;
  236. size_t length;
  237. size_t pos = 0;
  238. length = strlen (text);
  239. actual = result;
  240. remain = sizeof (result);
  241. if ((int) length <= width)
  242. {
  243. ident = 0;
  244. switch (HIDE_FIT (just_mode))
  245. {
  246. case J_CENTER_LEFT:
  247. case J_CENTER:
  248. ident = (width - length) / 2;
  249. break;
  250. case J_RIGHT:
  251. ident = width - length;
  252. break;
  253. }
  254. if ((int) remain <= ident)
  255. goto finally;
  256. memset (actual, ' ', ident);
  257. actual += ident;
  258. remain -= ident;
  259. for (; pos < length && remain > 1; pos++, actual++, remain--)
  260. {
  261. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  262. }
  263. if (width - length - ident > 0)
  264. {
  265. if (remain <= width - length - ident)
  266. goto finally;
  267. memset (actual, ' ', width - length - ident);
  268. actual += width - length - ident;
  269. remain -= width - length - ident;
  270. }
  271. }
  272. else
  273. {
  274. if (IS_FIT (just_mode))
  275. {
  276. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  277. {
  278. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  279. }
  280. if (remain <= 1)
  281. goto finally;
  282. actual[0] = '~';
  283. actual++;
  284. remain--;
  285. pos += length - width + 1;
  286. for (; pos < length && remain > 1; pos++, actual++, remain--)
  287. {
  288. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  289. }
  290. }
  291. else
  292. {
  293. ident = 0;
  294. switch (HIDE_FIT (just_mode))
  295. {
  296. case J_CENTER:
  297. ident = (length - width) / 2;
  298. break;
  299. case J_RIGHT:
  300. ident = length - width;
  301. break;
  302. }
  303. pos += ident;
  304. for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
  305. {
  306. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  307. }
  308. }
  309. }
  310. finally:
  311. actual[0] = '\0';
  312. return result;
  313. }
  314. static const char *
  315. str_8bit_term_trim (const char *text, int width)
  316. {
  317. static char result[BUF_MEDIUM];
  318. size_t remain;
  319. char *actual;
  320. size_t pos = 0;
  321. size_t length;
  322. length = strlen (text);
  323. actual = result;
  324. remain = sizeof (result);
  325. if (width < (int) length)
  326. {
  327. if (width <= 3)
  328. {
  329. memset (actual, '.', width);
  330. actual += width;
  331. remain -= width;
  332. }
  333. else
  334. {
  335. memset (actual, '.', 3);
  336. actual += 3;
  337. remain -= 3;
  338. pos += length - width + 3;
  339. for (; pos < length && remain > 1; pos++, actual++, remain--)
  340. {
  341. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  342. }
  343. }
  344. }
  345. else
  346. {
  347. for (; pos < length && remain > 1; pos++, actual++, remain--)
  348. {
  349. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  350. }
  351. }
  352. actual[0] = '\0';
  353. return result;
  354. }
  355. static int
  356. str_8bit_term_width2 (const char *text, size_t length)
  357. {
  358. return (length != (size_t) (-1)) ? min (strlen (text), length) : strlen (text);
  359. }
  360. static int
  361. str_8bit_term_width1 (const char *text)
  362. {
  363. return str_8bit_term_width2 (text, (size_t) (-1));
  364. }
  365. static int
  366. str_8bit_term_char_width (const char *text)
  367. {
  368. (void) text;
  369. return 1;
  370. }
  371. static const char *
  372. str_8bit_term_substring (const char *text, int start, int width)
  373. {
  374. static char result[BUF_MEDIUM];
  375. size_t remain;
  376. char *actual;
  377. size_t pos = 0;
  378. size_t length;
  379. actual = result;
  380. remain = sizeof (result);
  381. length = strlen (text);
  382. if (start < (int) length)
  383. {
  384. pos += start;
  385. for (; pos < length && width > 0 && remain > 1; pos++, width--, actual++, remain--)
  386. {
  387. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  388. }
  389. }
  390. for (; width > 0 && remain > 1; actual++, remain--, width--)
  391. {
  392. actual[0] = ' ';
  393. }
  394. actual[0] = '\0';
  395. return result;
  396. }
  397. static const char *
  398. str_8bit_trunc (const char *text, int width)
  399. {
  400. static char result[MC_MAXPATHLEN];
  401. int remain;
  402. char *actual;
  403. size_t pos = 0;
  404. size_t length;
  405. actual = result;
  406. remain = sizeof (result);
  407. length = strlen (text);
  408. if ((int) length > width)
  409. {
  410. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  411. {
  412. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  413. }
  414. if (remain <= 1)
  415. goto finally;
  416. actual[0] = '~';
  417. actual++;
  418. remain--;
  419. pos += length - width + 1;
  420. for (; pos < length && remain > 1; pos++, actual++, remain--)
  421. {
  422. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  423. }
  424. }
  425. else
  426. {
  427. for (; pos < length && remain > 1; pos++, actual++, remain--)
  428. {
  429. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  430. }
  431. }
  432. finally:
  433. actual[0] = '\0';
  434. return result;
  435. }
  436. static int
  437. str_8bit_offset_to_pos (const char *text, size_t length)
  438. {
  439. (void) text;
  440. return (int) length;
  441. }
  442. static int
  443. str_8bit_column_to_pos (const char *text, size_t pos)
  444. {
  445. (void) text;
  446. return (int) pos;
  447. }
  448. static char *
  449. str_8bit_create_search_needle (const char *needle, int case_sen)
  450. {
  451. (void) case_sen;
  452. return (char *) needle;
  453. }
  454. static void
  455. str_8bit_release_search_needle (char *needle, int case_sen)
  456. {
  457. (void) case_sen;
  458. (void) needle;
  459. }
  460. static char *
  461. str_8bit_strdown (const char *str)
  462. {
  463. char *rets, *p;
  464. rets = g_strdup (str);
  465. if (rets == NULL)
  466. return NULL;
  467. for (p = rets; *p != '\0'; p++)
  468. *p = char_tolower (*p);
  469. return rets;
  470. }
  471. static const char *
  472. str_8bit_search_first (const char *text, const char *search, int case_sen)
  473. {
  474. char *fold_text;
  475. char *fold_search;
  476. const char *match;
  477. size_t offsset;
  478. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  479. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  480. match = g_strstr_len (fold_text, -1, fold_search);
  481. if (match != NULL)
  482. {
  483. offsset = match - fold_text;
  484. match = text + offsset;
  485. }
  486. if (!case_sen)
  487. {
  488. g_free (fold_text);
  489. g_free (fold_search);
  490. }
  491. return match;
  492. }
  493. static const char *
  494. str_8bit_search_last (const char *text, const char *search, int case_sen)
  495. {
  496. char *fold_text;
  497. char *fold_search;
  498. const char *match;
  499. size_t offsset;
  500. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  501. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  502. match = g_strrstr_len (fold_text, -1, fold_search);
  503. if (match != NULL)
  504. {
  505. offsset = match - fold_text;
  506. match = text + offsset;
  507. }
  508. if (!case_sen)
  509. {
  510. g_free (fold_text);
  511. g_free (fold_search);
  512. }
  513. return match;
  514. }
  515. static int
  516. str_8bit_compare (const char *t1, const char *t2)
  517. {
  518. return strcmp (t1, t2);
  519. }
  520. static int
  521. str_8bit_ncompare (const char *t1, const char *t2)
  522. {
  523. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  524. }
  525. static int
  526. str_8bit_casecmp (const char *s1, const char *s2)
  527. {
  528. /* code from GLib */
  529. #ifdef HAVE_STRCASECMP
  530. g_return_val_if_fail (s1 != NULL, 0);
  531. g_return_val_if_fail (s2 != NULL, 0);
  532. return strcasecmp (s1, s2);
  533. #else
  534. gint c1, c2;
  535. g_return_val_if_fail (s1 != NULL, 0);
  536. g_return_val_if_fail (s2 != NULL, 0);
  537. while (*s1 != '\0' && *s2 != '\0')
  538. {
  539. /* According to A. Cox, some platforms have islower's that
  540. * don't work right on non-uppercase
  541. */
  542. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  543. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  544. if (c1 != c2)
  545. return (c1 - c2);
  546. s1++;
  547. s2++;
  548. }
  549. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  550. #endif
  551. }
  552. static int
  553. str_8bit_ncasecmp (const char *s1, const char *s2)
  554. {
  555. size_t n;
  556. g_return_val_if_fail (s1 != NULL, 0);
  557. g_return_val_if_fail (s2 != NULL, 0);
  558. n = min (strlen (s1), strlen (s2));
  559. /* code from GLib */
  560. #ifdef HAVE_STRNCASECMP
  561. return strncasecmp (s1, s2, n);
  562. #else
  563. gint c1, c2;
  564. while (n != 0 && *s1 != '\0' && *s2 != '\0')
  565. {
  566. n -= 1;
  567. /* According to A. Cox, some platforms have islower's that
  568. * don't work right on non-uppercase
  569. */
  570. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  571. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  572. if (c1 != c2)
  573. return (c1 - c2);
  574. s1++;
  575. s2++;
  576. }
  577. if (n != 0)
  578. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  579. else
  580. return 0;
  581. #endif
  582. }
  583. static int
  584. str_8bit_prefix (const char *text, const char *prefix)
  585. {
  586. int result;
  587. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  588. && text[result] == prefix[result]; result++);
  589. return result;
  590. }
  591. static int
  592. str_8bit_caseprefix (const char *text, const char *prefix)
  593. {
  594. int result;
  595. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  596. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  597. return result;
  598. }
  599. static void
  600. str_8bit_fix_string (char *text)
  601. {
  602. (void) text;
  603. }
  604. static char *
  605. str_8bit_create_key (const char *text, int case_sen)
  606. {
  607. return (case_sen) ? (char *) text : str_8bit_strdown (text);
  608. }
  609. static int
  610. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  611. {
  612. if (case_sen)
  613. return strcmp (t1, t2);
  614. else
  615. return strcoll (t1, t2);
  616. }
  617. static void
  618. str_8bit_release_key (char *key, int case_sen)
  619. {
  620. if (!case_sen)
  621. g_free (key);
  622. }
  623. struct str_class
  624. str_8bit_init (void)
  625. {
  626. struct str_class result;
  627. result.conv_gerror_message = str_8bit_conv_gerror_message;
  628. result.vfs_convert_to = str_8bit_vfs_convert_to;
  629. result.insert_replace_char = str_8bit_insert_replace_char;
  630. result.is_valid_string = str_8bit_is_valid_string;
  631. result.is_valid_char = str_8bit_is_valid_char;
  632. result.cnext_char = str_8bit_cnext_char;
  633. result.cprev_char = str_8bit_cprev_char;
  634. result.cnext_char_safe = str_8bit_cnext_char;
  635. result.cprev_char_safe = str_8bit_cprev_char;
  636. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  637. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  638. result.isspace = str_8bit_isspace;
  639. result.ispunct = str_8bit_ispunct;
  640. result.isalnum = str_8bit_isalnum;
  641. result.isdigit = str_8bit_isdigit;
  642. result.isprint = str_8bit_isprint;
  643. result.iscombiningmark = str_8bit_iscombiningmark;
  644. result.toupper = str_8bit_toupper;
  645. result.tolower = str_8bit_tolower;
  646. result.length = str_8bit_length;
  647. result.length2 = str_8bit_length2;
  648. result.length_noncomb = str_8bit_length;
  649. result.fix_string = str_8bit_fix_string;
  650. result.term_form = str_8bit_term_form;
  651. result.fit_to_term = str_8bit_fit_to_term;
  652. result.term_trim = str_8bit_term_trim;
  653. result.term_width2 = str_8bit_term_width2;
  654. result.term_width1 = str_8bit_term_width1;
  655. result.term_char_width = str_8bit_term_char_width;
  656. result.term_substring = str_8bit_term_substring;
  657. result.trunc = str_8bit_trunc;
  658. result.offset_to_pos = str_8bit_offset_to_pos;
  659. result.column_to_pos = str_8bit_column_to_pos;
  660. result.create_search_needle = str_8bit_create_search_needle;
  661. result.release_search_needle = str_8bit_release_search_needle;
  662. result.search_first = str_8bit_search_first;
  663. result.search_last = str_8bit_search_last;
  664. result.compare = str_8bit_compare;
  665. result.ncompare = str_8bit_ncompare;
  666. result.casecmp = str_8bit_casecmp;
  667. result.ncasecmp = str_8bit_ncasecmp;
  668. result.prefix = str_8bit_prefix;
  669. result.caseprefix = str_8bit_caseprefix;
  670. result.create_key = str_8bit_create_key;
  671. result.create_key_for_filename = str_8bit_create_key;
  672. result.key_collate = str_8bit_key_collate;
  673. result.release_key = str_8bit_release_key;
  674. return result;
  675. }