strutil8bit.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. /* 8bit strings utilities
  2. Copyright (C) 2007 Free Software Foundation, Inc.
  3. Written 2007 by:
  4. Rostislav Benes
  5. The file_date routine is mostly from GNU's fileutils package,
  6. written by Richard Stallman and David MacKenzie.
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #include <config.h>
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <errno.h>
  23. #include "lib/global.h"
  24. #include "lib/strutil.h"
  25. /* functions for singlebyte encodings, all characters have width 1
  26. * using standard system functions
  27. * there are only small differences between functions in strutil8bit.c
  28. * and strutilascii.c
  29. */
  30. static const char replch = '?';
  31. /*
  32. * Inlines to equalize 'char' signedness for single 'char' encodings.
  33. * Instead of writing
  34. * isspace((unsigned char)c);
  35. * you can write
  36. * char_isspace(c);
  37. */
  38. #define DECLARE_CTYPE_WRAPPER(func_name) \
  39. static inline int char_##func_name(char c) \
  40. { \
  41. return func_name((int)(unsigned char)c); \
  42. }
  43. DECLARE_CTYPE_WRAPPER (isalnum)
  44. DECLARE_CTYPE_WRAPPER (isalpha)
  45. DECLARE_CTYPE_WRAPPER (isascii)
  46. DECLARE_CTYPE_WRAPPER (isblank)
  47. DECLARE_CTYPE_WRAPPER (iscntrl)
  48. DECLARE_CTYPE_WRAPPER (isdigit)
  49. DECLARE_CTYPE_WRAPPER (isgraph)
  50. DECLARE_CTYPE_WRAPPER (islower)
  51. DECLARE_CTYPE_WRAPPER (isprint)
  52. DECLARE_CTYPE_WRAPPER (ispunct)
  53. DECLARE_CTYPE_WRAPPER (isspace)
  54. DECLARE_CTYPE_WRAPPER (isupper)
  55. DECLARE_CTYPE_WRAPPER (isxdigit) DECLARE_CTYPE_WRAPPER (toupper) DECLARE_CTYPE_WRAPPER (tolower)
  56. static void
  57. str_8bit_insert_replace_char (GString * buffer)
  58. {
  59. g_string_append_c (buffer, replch);
  60. }
  61. static int
  62. str_8bit_is_valid_string (const char *text)
  63. {
  64. (void) text;
  65. return 1;
  66. }
  67. static int
  68. str_8bit_is_valid_char (const char *ch, size_t size)
  69. {
  70. (void) ch;
  71. (void) size;
  72. return 1;
  73. }
  74. static void
  75. str_8bit_cnext_char (const char **text)
  76. {
  77. (*text)++;
  78. }
  79. static void
  80. str_8bit_cprev_char (const char **text)
  81. {
  82. (*text)--;
  83. }
  84. static int
  85. str_8bit_cnext_noncomb_char (const char **text)
  86. {
  87. if (*text[0] != '\0')
  88. {
  89. (*text)++;
  90. return 1;
  91. }
  92. else
  93. return 0;
  94. }
  95. static int
  96. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  97. {
  98. if ((*text) != begin)
  99. {
  100. (*text)--;
  101. return 1;
  102. }
  103. else
  104. return 0;
  105. }
  106. static int
  107. str_8bit_isspace (const char *text)
  108. {
  109. return char_isspace (text[0]);
  110. }
  111. static int
  112. str_8bit_ispunct (const char *text)
  113. {
  114. return char_ispunct (text[0]);
  115. }
  116. static int
  117. str_8bit_isalnum (const char *text)
  118. {
  119. return char_isalnum (text[0]);
  120. }
  121. static int
  122. str_8bit_isdigit (const char *text)
  123. {
  124. return char_isdigit (text[0]);
  125. }
  126. static int
  127. str_8bit_isprint (const char *text)
  128. {
  129. return char_isprint (text[0]);
  130. }
  131. static int
  132. str_8bit_iscombiningmark (const char *text)
  133. {
  134. (void) text;
  135. return 0;
  136. }
  137. static int
  138. str_8bit_toupper (const char *text, char **out, size_t * remain)
  139. {
  140. if (*remain <= 1)
  141. return 0;
  142. (*out)[0] = char_toupper (text[0]);
  143. (*out)++;
  144. (*remain)--;
  145. return 1;
  146. }
  147. static int
  148. str_8bit_tolower (const char *text, char **out, size_t * remain)
  149. {
  150. if (*remain <= 1)
  151. return 0;
  152. (*out)[0] = char_tolower (text[0]);
  153. (*out)++;
  154. (*remain)--;
  155. return 1;
  156. }
  157. static int
  158. str_8bit_length (const char *text)
  159. {
  160. return strlen (text);
  161. }
  162. static int
  163. str_8bit_length2 (const char *text, int size)
  164. {
  165. return (size >= 0) ? min (strlen (text), (gsize) size) : strlen (text);
  166. }
  167. static gchar *
  168. str_8bit_conv_gerror_message (GError * error, const char *def_msg)
  169. {
  170. GIConv conv;
  171. gchar *ret;
  172. /* glib messages are in UTF-8 charset */
  173. conv = str_crt_conv_from ("UTF-8");
  174. if (conv == INVALID_CONV)
  175. ret = g_strdup (def_msg != NULL ? def_msg : "");
  176. else
  177. {
  178. GString *buf;
  179. buf = g_string_new ("");
  180. if (str_convert (conv, error->message, buf) != ESTR_FAILURE)
  181. {
  182. ret = buf->str;
  183. g_string_free (buf, FALSE);
  184. }
  185. else
  186. {
  187. ret = g_strdup (def_msg != NULL ? def_msg : "");
  188. g_string_free (buf, TRUE);
  189. }
  190. str_close_conv (conv);
  191. }
  192. return ret;
  193. }
  194. static estr_t
  195. str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  196. {
  197. estr_t result;
  198. if (coder == str_cnv_not_convert)
  199. {
  200. g_string_append_len (buffer, string, size);
  201. result = ESTR_SUCCESS;
  202. }
  203. else
  204. result = str_nconvert (coder, (char *) string, size, buffer);
  205. return result;
  206. }
  207. static const char *
  208. str_8bit_term_form (const char *text)
  209. {
  210. static char result[BUF_MEDIUM];
  211. char *actual;
  212. size_t remain;
  213. size_t length;
  214. size_t pos = 0;
  215. actual = result;
  216. remain = sizeof (result);
  217. length = strlen (text);
  218. for (; pos < length && remain > 1; pos++, actual++, remain--)
  219. {
  220. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  221. }
  222. actual[0] = '\0';
  223. return result;
  224. }
  225. static const char *
  226. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  227. {
  228. static char result[BUF_MEDIUM];
  229. char *actual;
  230. size_t remain;
  231. int ident;
  232. size_t length;
  233. size_t pos = 0;
  234. length = strlen (text);
  235. actual = result;
  236. remain = sizeof (result);
  237. if ((int) length <= width)
  238. {
  239. ident = 0;
  240. switch (HIDE_FIT (just_mode))
  241. {
  242. case J_CENTER_LEFT:
  243. case J_CENTER:
  244. ident = (width - length) / 2;
  245. break;
  246. case J_RIGHT:
  247. ident = width - length;
  248. break;
  249. }
  250. if ((int) remain <= ident)
  251. goto finally;
  252. memset (actual, ' ', ident);
  253. actual += ident;
  254. remain -= ident;
  255. for (; pos < length && remain > 1; pos++, actual++, remain--)
  256. {
  257. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  258. }
  259. if (width - length - ident > 0)
  260. {
  261. if (remain <= width - length - ident)
  262. goto finally;
  263. memset (actual, ' ', width - length - ident);
  264. actual += width - length - ident;
  265. remain -= width - length - ident;
  266. }
  267. }
  268. else
  269. {
  270. if (IS_FIT (just_mode))
  271. {
  272. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  273. {
  274. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  275. }
  276. if (remain <= 1)
  277. goto finally;
  278. actual[0] = '~';
  279. actual++;
  280. remain--;
  281. pos += length - width + 1;
  282. for (; pos < length && remain > 1; pos++, actual++, remain--)
  283. {
  284. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  285. }
  286. }
  287. else
  288. {
  289. ident = 0;
  290. switch (HIDE_FIT (just_mode))
  291. {
  292. case J_CENTER:
  293. ident = (length - width) / 2;
  294. break;
  295. case J_RIGHT:
  296. ident = length - width;
  297. break;
  298. }
  299. pos += ident;
  300. for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
  301. {
  302. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  303. }
  304. }
  305. }
  306. finally:
  307. actual[0] = '\0';
  308. return result;
  309. }
  310. static const char *
  311. str_8bit_term_trim (const char *text, int width)
  312. {
  313. static char result[BUF_MEDIUM];
  314. size_t remain;
  315. char *actual;
  316. size_t pos = 0;
  317. size_t length;
  318. length = strlen (text);
  319. actual = result;
  320. remain = sizeof (result);
  321. if (width < (int) length)
  322. {
  323. if (width <= 3)
  324. {
  325. memset (actual, '.', width);
  326. actual += width;
  327. remain -= width;
  328. }
  329. else
  330. {
  331. memset (actual, '.', 3);
  332. actual += 3;
  333. remain -= 3;
  334. pos += length - width + 3;
  335. for (; pos < length && remain > 1; pos++, actual++, remain--)
  336. {
  337. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  338. }
  339. }
  340. }
  341. else
  342. {
  343. for (; pos < length && remain > 1; pos++, actual++, remain--)
  344. {
  345. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  346. }
  347. }
  348. actual[0] = '\0';
  349. return result;
  350. }
  351. static int
  352. str_8bit_term_width2 (const char *text, size_t length)
  353. {
  354. return (length != (size_t) (-1)) ? min (strlen (text), length) : strlen (text);
  355. }
  356. static int
  357. str_8bit_term_width1 (const char *text)
  358. {
  359. return str_8bit_term_width2 (text, (size_t) (-1));
  360. }
  361. static int
  362. str_8bit_term_char_width (const char *text)
  363. {
  364. (void) text;
  365. return 1;
  366. }
  367. static const char *
  368. str_8bit_term_substring (const char *text, int start, int width)
  369. {
  370. static char result[BUF_MEDIUM];
  371. size_t remain;
  372. char *actual;
  373. size_t pos = 0;
  374. size_t length;
  375. actual = result;
  376. remain = sizeof (result);
  377. length = strlen (text);
  378. if (start < (int) length)
  379. {
  380. pos += start;
  381. for (; pos < length && width > 0 && remain > 1; pos++, width--, actual++, remain--)
  382. {
  383. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  384. }
  385. }
  386. for (; width > 0 && remain > 1; actual++, remain--, width--)
  387. {
  388. actual[0] = ' ';
  389. }
  390. actual[0] = '\0';
  391. return result;
  392. }
  393. static const char *
  394. str_8bit_trunc (const char *text, int width)
  395. {
  396. static char result[MC_MAXPATHLEN];
  397. int remain;
  398. char *actual;
  399. size_t pos = 0;
  400. size_t length;
  401. actual = result;
  402. remain = sizeof (result);
  403. length = strlen (text);
  404. if ((int) length > width)
  405. {
  406. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  407. {
  408. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  409. }
  410. if (remain <= 1)
  411. goto finally;
  412. actual[0] = '~';
  413. actual++;
  414. remain--;
  415. pos += length - width + 1;
  416. for (; pos < length && remain > 1; pos++, actual++, remain--)
  417. {
  418. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  419. }
  420. }
  421. else
  422. {
  423. for (; pos < length && remain > 1; pos++, actual++, remain--)
  424. {
  425. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  426. }
  427. }
  428. finally:
  429. actual[0] = '\0';
  430. return result;
  431. }
  432. static int
  433. str_8bit_offset_to_pos (const char *text, size_t length)
  434. {
  435. (void) text;
  436. return (int) length;
  437. }
  438. static int
  439. str_8bit_column_to_pos (const char *text, size_t pos)
  440. {
  441. (void) text;
  442. return (int) pos;
  443. }
  444. static char *
  445. str_8bit_create_search_needle (const char *needle, int case_sen)
  446. {
  447. (void) case_sen;
  448. return (char *) needle;
  449. }
  450. static void
  451. str_8bit_release_search_needle (char *needle, int case_sen)
  452. {
  453. (void) case_sen;
  454. (void) needle;
  455. }
  456. static char *
  457. str_8bit_strdown (const char *str)
  458. {
  459. char *rets, *p;
  460. rets = g_strdup (str);
  461. if (rets == NULL)
  462. return NULL;
  463. for (p = rets; *p != '\0'; p++)
  464. *p = char_tolower (*p);
  465. return rets;
  466. }
  467. static const char *
  468. str_8bit_search_first (const char *text, const char *search, int case_sen)
  469. {
  470. char *fold_text;
  471. char *fold_search;
  472. const char *match;
  473. size_t offsset;
  474. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  475. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  476. match = g_strstr_len (fold_text, -1, fold_search);
  477. if (match != NULL)
  478. {
  479. offsset = match - fold_text;
  480. match = text + offsset;
  481. }
  482. if (!case_sen)
  483. {
  484. g_free (fold_text);
  485. g_free (fold_search);
  486. }
  487. return match;
  488. }
  489. static const char *
  490. str_8bit_search_last (const char *text, const char *search, int case_sen)
  491. {
  492. char *fold_text;
  493. char *fold_search;
  494. const char *match;
  495. size_t offsset;
  496. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  497. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  498. match = g_strrstr_len (fold_text, -1, fold_search);
  499. if (match != NULL)
  500. {
  501. offsset = match - fold_text;
  502. match = text + offsset;
  503. }
  504. if (!case_sen)
  505. {
  506. g_free (fold_text);
  507. g_free (fold_search);
  508. }
  509. return match;
  510. }
  511. static int
  512. str_8bit_compare (const char *t1, const char *t2)
  513. {
  514. return strcmp (t1, t2);
  515. }
  516. static int
  517. str_8bit_ncompare (const char *t1, const char *t2)
  518. {
  519. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  520. }
  521. static int
  522. str_8bit_casecmp (const char *s1, const char *s2)
  523. {
  524. /* code from GLib */
  525. #ifdef HAVE_STRCASECMP
  526. g_return_val_if_fail (s1 != NULL, 0);
  527. g_return_val_if_fail (s2 != NULL, 0);
  528. return strcasecmp (s1, s2);
  529. #else
  530. gint c1, c2;
  531. g_return_val_if_fail (s1 != NULL, 0);
  532. g_return_val_if_fail (s2 != NULL, 0);
  533. while (*s1 != '\0' && *s2 != '\0')
  534. {
  535. /* According to A. Cox, some platforms have islower's that
  536. * don't work right on non-uppercase
  537. */
  538. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  539. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  540. if (c1 != c2)
  541. return (c1 - c2);
  542. s1++;
  543. s2++;
  544. }
  545. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  546. #endif
  547. }
  548. static int
  549. str_8bit_ncasecmp (const char *s1, const char *s2)
  550. {
  551. size_t n;
  552. g_return_val_if_fail (s1 != NULL, 0);
  553. g_return_val_if_fail (s2 != NULL, 0);
  554. n = min (strlen (s1), strlen (s2));
  555. /* code from GLib */
  556. #ifdef HAVE_STRNCASECMP
  557. return strncasecmp (s1, s2, n);
  558. #else
  559. gint c1, c2;
  560. while (n != 0 && *s1 != '\0' && *s2 != '\0')
  561. {
  562. n -= 1;
  563. /* According to A. Cox, some platforms have islower's that
  564. * don't work right on non-uppercase
  565. */
  566. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  567. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  568. if (c1 != c2)
  569. return (c1 - c2);
  570. s1++;
  571. s2++;
  572. }
  573. if (n != 0)
  574. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  575. else
  576. return 0;
  577. #endif
  578. }
  579. static int
  580. str_8bit_prefix (const char *text, const char *prefix)
  581. {
  582. int result;
  583. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  584. && text[result] == prefix[result]; result++);
  585. return result;
  586. }
  587. static int
  588. str_8bit_caseprefix (const char *text, const char *prefix)
  589. {
  590. int result;
  591. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  592. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  593. return result;
  594. }
  595. static void
  596. str_8bit_fix_string (char *text)
  597. {
  598. (void) text;
  599. }
  600. static char *
  601. str_8bit_create_key (const char *text, int case_sen)
  602. {
  603. return (case_sen) ? (char *) text : str_8bit_strdown (text);
  604. }
  605. static int
  606. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  607. {
  608. if (case_sen)
  609. return strcmp (t1, t2);
  610. else
  611. return strcoll (t1, t2);
  612. }
  613. static void
  614. str_8bit_release_key (char *key, int case_sen)
  615. {
  616. if (!case_sen)
  617. g_free (key);
  618. }
  619. struct str_class
  620. str_8bit_init (void)
  621. {
  622. struct str_class result;
  623. result.conv_gerror_message = str_8bit_conv_gerror_message;
  624. result.vfs_convert_to = str_8bit_vfs_convert_to;
  625. result.insert_replace_char = str_8bit_insert_replace_char;
  626. result.is_valid_string = str_8bit_is_valid_string;
  627. result.is_valid_char = str_8bit_is_valid_char;
  628. result.cnext_char = str_8bit_cnext_char;
  629. result.cprev_char = str_8bit_cprev_char;
  630. result.cnext_char_safe = str_8bit_cnext_char;
  631. result.cprev_char_safe = str_8bit_cprev_char;
  632. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  633. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  634. result.isspace = str_8bit_isspace;
  635. result.ispunct = str_8bit_ispunct;
  636. result.isalnum = str_8bit_isalnum;
  637. result.isdigit = str_8bit_isdigit;
  638. result.isprint = str_8bit_isprint;
  639. result.iscombiningmark = str_8bit_iscombiningmark;
  640. result.toupper = str_8bit_toupper;
  641. result.tolower = str_8bit_tolower;
  642. result.length = str_8bit_length;
  643. result.length2 = str_8bit_length2;
  644. result.length_noncomb = str_8bit_length;
  645. result.fix_string = str_8bit_fix_string;
  646. result.term_form = str_8bit_term_form;
  647. result.fit_to_term = str_8bit_fit_to_term;
  648. result.term_trim = str_8bit_term_trim;
  649. result.term_width2 = str_8bit_term_width2;
  650. result.term_width1 = str_8bit_term_width1;
  651. result.term_char_width = str_8bit_term_char_width;
  652. result.term_substring = str_8bit_term_substring;
  653. result.trunc = str_8bit_trunc;
  654. result.offset_to_pos = str_8bit_offset_to_pos;
  655. result.column_to_pos = str_8bit_column_to_pos;
  656. result.create_search_needle = str_8bit_create_search_needle;
  657. result.release_search_needle = str_8bit_release_search_needle;
  658. result.search_first = str_8bit_search_first;
  659. result.search_last = str_8bit_search_last;
  660. result.compare = str_8bit_compare;
  661. result.ncompare = str_8bit_ncompare;
  662. result.casecmp = str_8bit_casecmp;
  663. result.ncasecmp = str_8bit_ncasecmp;
  664. result.prefix = str_8bit_prefix;
  665. result.caseprefix = str_8bit_caseprefix;
  666. result.create_key = str_8bit_create_key;
  667. result.create_key_for_filename = str_8bit_create_key;
  668. result.key_collate = str_8bit_key_collate;
  669. result.release_key = str_8bit_release_key;
  670. return result;
  671. }