strutil8bit.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /*
  2. 8bit strings utilities
  3. Copyright (C) 2007-2014
  4. Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. This file is part of the Midnight Commander.
  8. The Midnight Commander is free software: you can redistribute it
  9. and/or modify it under the terms of the GNU General Public License as
  10. published by the Free Software Foundation, either version 3 of the License,
  11. or (at your option) any later version.
  12. The Midnight Commander is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. GNU General Public License for more details.
  16. You should have received a copy of the GNU General Public License
  17. along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <config.h>
  20. #include <ctype.h>
  21. #include <stdlib.h>
  22. #include "lib/global.h"
  23. #include "lib/strutil.h"
  24. /* functions for singlebyte encodings, all characters have width 1
  25. * using standard system functions
  26. * there are only small differences between functions in strutil8bit.c
  27. * and strutilascii.c
  28. */
  29. static const char replch = '?';
  30. /*
  31. * Inlines to equalize 'char' signedness for single 'char' encodings.
  32. * Instead of writing
  33. * isspace((unsigned char)c);
  34. * you can write
  35. * char_isspace(c);
  36. */
  37. #define DECLARE_CTYPE_WRAPPER(func_name) \
  38. static inline int char_##func_name(char c) \
  39. { \
  40. return func_name((int)(unsigned char)c); \
  41. }
  42. /* *INDENT-OFF* */
  43. DECLARE_CTYPE_WRAPPER (isalnum)
  44. DECLARE_CTYPE_WRAPPER (isalpha)
  45. DECLARE_CTYPE_WRAPPER (isascii)
  46. DECLARE_CTYPE_WRAPPER (isblank)
  47. DECLARE_CTYPE_WRAPPER (iscntrl)
  48. DECLARE_CTYPE_WRAPPER (isdigit)
  49. DECLARE_CTYPE_WRAPPER (isgraph)
  50. DECLARE_CTYPE_WRAPPER (islower)
  51. DECLARE_CTYPE_WRAPPER (isprint)
  52. DECLARE_CTYPE_WRAPPER (ispunct)
  53. DECLARE_CTYPE_WRAPPER (isspace)
  54. DECLARE_CTYPE_WRAPPER (isupper)
  55. DECLARE_CTYPE_WRAPPER (isxdigit)
  56. DECLARE_CTYPE_WRAPPER (toupper)
  57. DECLARE_CTYPE_WRAPPER (tolower)
  58. /* *INDENT-ON* */
  59. static void
  60. str_8bit_insert_replace_char (GString * buffer)
  61. {
  62. g_string_append_c (buffer, replch);
  63. }
  64. static int
  65. str_8bit_is_valid_string (const char *text)
  66. {
  67. (void) text;
  68. return 1;
  69. }
  70. static int
  71. str_8bit_is_valid_char (const char *ch, size_t size)
  72. {
  73. (void) ch;
  74. (void) size;
  75. return 1;
  76. }
  77. static void
  78. str_8bit_cnext_char (const char **text)
  79. {
  80. (*text)++;
  81. }
  82. static void
  83. str_8bit_cprev_char (const char **text)
  84. {
  85. (*text)--;
  86. }
  87. static int
  88. str_8bit_cnext_noncomb_char (const char **text)
  89. {
  90. if (*text[0] == '\0')
  91. return 0;
  92. (*text)++;
  93. return 1;
  94. }
  95. static int
  96. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  97. {
  98. if ((*text) == begin)
  99. return 0;
  100. (*text)--;
  101. return 1;
  102. }
  103. static int
  104. str_8bit_isspace (const char *text)
  105. {
  106. return char_isspace (text[0]);
  107. }
  108. static int
  109. str_8bit_ispunct (const char *text)
  110. {
  111. return char_ispunct (text[0]);
  112. }
  113. static int
  114. str_8bit_isalnum (const char *text)
  115. {
  116. return char_isalnum (text[0]);
  117. }
  118. static int
  119. str_8bit_isdigit (const char *text)
  120. {
  121. return char_isdigit (text[0]);
  122. }
  123. static int
  124. str_8bit_isprint (const char *text)
  125. {
  126. return char_isprint (text[0]);
  127. }
  128. static gboolean
  129. str_8bit_iscombiningmark (const char *text)
  130. {
  131. (void) text;
  132. return FALSE;
  133. }
  134. static int
  135. str_8bit_toupper (const char *text, char **out, size_t * remain)
  136. {
  137. if (*remain <= 1)
  138. return 0;
  139. (*out)[0] = char_toupper (text[0]);
  140. (*out)++;
  141. (*remain)--;
  142. return 1;
  143. }
  144. static int
  145. str_8bit_tolower (const char *text, char **out, size_t * remain)
  146. {
  147. if (*remain <= 1)
  148. return 0;
  149. (*out)[0] = char_tolower (text[0]);
  150. (*out)++;
  151. (*remain)--;
  152. return 1;
  153. }
  154. static int
  155. str_8bit_length (const char *text)
  156. {
  157. return strlen (text);
  158. }
  159. static int
  160. str_8bit_length2 (const char *text, int size)
  161. {
  162. return (size >= 0) ? min (strlen (text), (gsize) size) : strlen (text);
  163. }
  164. static gchar *
  165. str_8bit_conv_gerror_message (GError * error, const char *def_msg)
  166. {
  167. GIConv conv;
  168. gchar *ret;
  169. /* glib messages are in UTF-8 charset */
  170. conv = str_crt_conv_from ("UTF-8");
  171. if (conv == INVALID_CONV)
  172. ret = g_strdup (def_msg != NULL ? def_msg : "");
  173. else
  174. {
  175. GString *buf;
  176. buf = g_string_new ("");
  177. if (str_convert (conv, error->message, buf) != ESTR_FAILURE)
  178. ret = g_string_free (buf, FALSE);
  179. else
  180. {
  181. ret = g_strdup (def_msg != NULL ? def_msg : "");
  182. g_string_free (buf, TRUE);
  183. }
  184. str_close_conv (conv);
  185. }
  186. return ret;
  187. }
  188. static estr_t
  189. str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  190. {
  191. estr_t result = ESTR_SUCCESS;
  192. if (coder == str_cnv_not_convert)
  193. g_string_append_len (buffer, string, size);
  194. else
  195. result = str_nconvert (coder, (char *) string, size, buffer);
  196. return result;
  197. }
  198. static const char *
  199. str_8bit_term_form (const char *text)
  200. {
  201. static char result[BUF_MEDIUM];
  202. char *actual;
  203. size_t remain;
  204. size_t length;
  205. size_t pos = 0;
  206. actual = result;
  207. remain = sizeof (result);
  208. length = strlen (text);
  209. for (; pos < length && remain > 1; pos++, actual++, remain--)
  210. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  211. actual[0] = '\0';
  212. return result;
  213. }
  214. static const char *
  215. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  216. {
  217. static char result[BUF_MEDIUM];
  218. char *actual;
  219. size_t remain;
  220. int ident = 0;
  221. size_t length;
  222. size_t pos = 0;
  223. length = strlen (text);
  224. actual = result;
  225. remain = sizeof (result);
  226. if ((int) length <= width)
  227. {
  228. switch (HIDE_FIT (just_mode))
  229. {
  230. case J_CENTER_LEFT:
  231. case J_CENTER:
  232. ident = (width - length) / 2;
  233. break;
  234. case J_RIGHT:
  235. ident = width - length;
  236. break;
  237. }
  238. if ((int) remain <= ident)
  239. goto finally;
  240. memset (actual, ' ', ident);
  241. actual += ident;
  242. remain -= ident;
  243. for (; pos < length && remain > 1; pos++, actual++, remain--)
  244. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  245. if (width - length - ident > 0)
  246. {
  247. if (remain <= width - length - ident)
  248. goto finally;
  249. memset (actual, ' ', width - length - ident);
  250. actual += width - length - ident;
  251. }
  252. }
  253. else if (IS_FIT (just_mode))
  254. {
  255. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  256. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  257. if (remain <= 1)
  258. goto finally;
  259. actual[0] = '~';
  260. actual++;
  261. remain--;
  262. pos += length - width + 1;
  263. for (; pos < length && remain > 1; pos++, actual++, remain--)
  264. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  265. }
  266. else
  267. {
  268. switch (HIDE_FIT (just_mode))
  269. {
  270. case J_CENTER:
  271. ident = (length - width) / 2;
  272. break;
  273. case J_RIGHT:
  274. ident = length - width;
  275. break;
  276. }
  277. pos += ident;
  278. for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
  279. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  280. }
  281. finally:
  282. actual[0] = '\0';
  283. return result;
  284. }
  285. static const char *
  286. str_8bit_term_trim (const char *text, int width)
  287. {
  288. static char result[BUF_MEDIUM];
  289. size_t remain;
  290. char *actual;
  291. size_t length;
  292. length = strlen (text);
  293. actual = result;
  294. remain = sizeof (result);
  295. if (width > 0)
  296. {
  297. size_t pos;
  298. if (width >= (int) length)
  299. {
  300. for (pos = 0; pos < length && remain > 1; pos++, actual++, remain--)
  301. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  302. }
  303. else if (width <= 3)
  304. {
  305. memset (actual, '.', width);
  306. actual += width;
  307. }
  308. else
  309. {
  310. memset (actual, '.', 3);
  311. actual += 3;
  312. remain -= 3;
  313. for (pos = length - width + 3; pos < length && remain > 1; pos++, actual++, remain--)
  314. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  315. }
  316. }
  317. actual[0] = '\0';
  318. return result;
  319. }
  320. static int
  321. str_8bit_term_width2 (const char *text, size_t length)
  322. {
  323. return (length != (size_t) (-1)) ? min (strlen (text), length) : strlen (text);
  324. }
  325. static int
  326. str_8bit_term_width1 (const char *text)
  327. {
  328. return str_8bit_term_width2 (text, (size_t) (-1));
  329. }
  330. static int
  331. str_8bit_term_char_width (const char *text)
  332. {
  333. (void) text;
  334. return 1;
  335. }
  336. static const char *
  337. str_8bit_term_substring (const char *text, int start, int width)
  338. {
  339. static char result[BUF_MEDIUM];
  340. size_t remain;
  341. char *actual;
  342. size_t length;
  343. actual = result;
  344. remain = sizeof (result);
  345. length = strlen (text);
  346. if (start < (int) length)
  347. {
  348. size_t pos;
  349. for (pos = start; pos < length && width > 0 && remain > 1;
  350. pos++, width--, actual++, remain--)
  351. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  352. }
  353. for (; width > 0 && remain > 1; actual++, remain--, width--)
  354. actual[0] = ' ';
  355. actual[0] = '\0';
  356. return result;
  357. }
  358. static const char *
  359. str_8bit_trunc (const char *text, int width)
  360. {
  361. static char result[MC_MAXPATHLEN];
  362. int remain;
  363. char *actual;
  364. size_t pos = 0;
  365. size_t length;
  366. actual = result;
  367. remain = sizeof (result);
  368. length = strlen (text);
  369. if ((int) length > width)
  370. {
  371. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  372. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  373. if (remain <= 1)
  374. goto finally;
  375. actual[0] = '~';
  376. actual++;
  377. remain--;
  378. pos += length - width + 1;
  379. for (; pos < length && remain > 1; pos++, actual++, remain--)
  380. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  381. }
  382. else
  383. {
  384. for (; pos < length && remain > 1; pos++, actual++, remain--)
  385. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  386. }
  387. finally:
  388. actual[0] = '\0';
  389. return result;
  390. }
  391. static int
  392. str_8bit_offset_to_pos (const char *text, size_t length)
  393. {
  394. (void) text;
  395. return (int) length;
  396. }
  397. static int
  398. str_8bit_column_to_pos (const char *text, size_t pos)
  399. {
  400. (void) text;
  401. return (int) pos;
  402. }
  403. static char *
  404. str_8bit_create_search_needle (const char *needle, int case_sen)
  405. {
  406. (void) case_sen;
  407. return (char *) needle;
  408. }
  409. static void
  410. str_8bit_release_search_needle (char *needle, int case_sen)
  411. {
  412. (void) case_sen;
  413. (void) needle;
  414. }
  415. static char *
  416. str_8bit_strdown (const char *str)
  417. {
  418. char *rets, *p;
  419. if (str == NULL)
  420. return NULL;
  421. rets = g_strdup (str);
  422. for (p = rets; *p != '\0'; p++)
  423. *p = char_tolower (*p);
  424. return rets;
  425. }
  426. static const char *
  427. str_8bit_search_first (const char *text, const char *search, int case_sen)
  428. {
  429. char *fold_text;
  430. char *fold_search;
  431. const char *match;
  432. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  433. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  434. match = g_strstr_len (fold_text, -1, fold_search);
  435. if (match != NULL)
  436. {
  437. size_t offset;
  438. offset = match - fold_text;
  439. match = text + offset;
  440. }
  441. if (!case_sen)
  442. {
  443. g_free (fold_text);
  444. g_free (fold_search);
  445. }
  446. return match;
  447. }
  448. static const char *
  449. str_8bit_search_last (const char *text, const char *search, int case_sen)
  450. {
  451. char *fold_text;
  452. char *fold_search;
  453. const char *match;
  454. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  455. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  456. match = g_strrstr_len (fold_text, -1, fold_search);
  457. if (match != NULL)
  458. {
  459. size_t offset;
  460. offset = match - fold_text;
  461. match = text + offset;
  462. }
  463. if (!case_sen)
  464. {
  465. g_free (fold_text);
  466. g_free (fold_search);
  467. }
  468. return match;
  469. }
  470. static int
  471. str_8bit_compare (const char *t1, const char *t2)
  472. {
  473. return strcmp (t1, t2);
  474. }
  475. static int
  476. str_8bit_ncompare (const char *t1, const char *t2)
  477. {
  478. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  479. }
  480. static int
  481. str_8bit_casecmp (const char *s1, const char *s2)
  482. {
  483. /* code from GLib */
  484. #ifdef HAVE_STRCASECMP
  485. g_return_val_if_fail (s1 != NULL, 0);
  486. g_return_val_if_fail (s2 != NULL, 0);
  487. return strcasecmp (s1, s2);
  488. #else
  489. gint c1, c2;
  490. g_return_val_if_fail (s1 != NULL, 0);
  491. g_return_val_if_fail (s2 != NULL, 0);
  492. while (*s1 != '\0' && *s2 != '\0')
  493. {
  494. /* According to A. Cox, some platforms have islower's that
  495. * don't work right on non-uppercase
  496. */
  497. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  498. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  499. if (c1 != c2)
  500. return (c1 - c2);
  501. s1++;
  502. s2++;
  503. }
  504. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  505. #endif
  506. }
  507. static int
  508. str_8bit_ncasecmp (const char *s1, const char *s2)
  509. {
  510. size_t n;
  511. g_return_val_if_fail (s1 != NULL, 0);
  512. g_return_val_if_fail (s2 != NULL, 0);
  513. n = min (strlen (s1), strlen (s2));
  514. /* code from GLib */
  515. #ifdef HAVE_STRNCASECMP
  516. return strncasecmp (s1, s2, n);
  517. #else
  518. gint c1, c2;
  519. while (n != 0 && *s1 != '\0' && *s2 != '\0')
  520. {
  521. n -= 1;
  522. /* According to A. Cox, some platforms have islower's that
  523. * don't work right on non-uppercase
  524. */
  525. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  526. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  527. if (c1 != c2)
  528. return (c1 - c2);
  529. s1++;
  530. s2++;
  531. }
  532. if (n == 0)
  533. return 0;
  534. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  535. #endif
  536. }
  537. static int
  538. str_8bit_prefix (const char *text, const char *prefix)
  539. {
  540. int result;
  541. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  542. && text[result] == prefix[result]; result++);
  543. return result;
  544. }
  545. static int
  546. str_8bit_caseprefix (const char *text, const char *prefix)
  547. {
  548. int result;
  549. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  550. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  551. return result;
  552. }
  553. static void
  554. str_8bit_fix_string (char *text)
  555. {
  556. (void) text;
  557. }
  558. static char *
  559. str_8bit_create_key (const char *text, int case_sen)
  560. {
  561. return (case_sen) ? (char *) text : str_8bit_strdown (text);
  562. }
  563. static int
  564. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  565. {
  566. if (case_sen)
  567. return strcmp (t1, t2);
  568. else
  569. return strcoll (t1, t2);
  570. }
  571. static void
  572. str_8bit_release_key (char *key, int case_sen)
  573. {
  574. if (!case_sen)
  575. g_free (key);
  576. }
  577. struct str_class
  578. str_8bit_init (void)
  579. {
  580. struct str_class result;
  581. result.conv_gerror_message = str_8bit_conv_gerror_message;
  582. result.vfs_convert_to = str_8bit_vfs_convert_to;
  583. result.insert_replace_char = str_8bit_insert_replace_char;
  584. result.is_valid_string = str_8bit_is_valid_string;
  585. result.is_valid_char = str_8bit_is_valid_char;
  586. result.cnext_char = str_8bit_cnext_char;
  587. result.cprev_char = str_8bit_cprev_char;
  588. result.cnext_char_safe = str_8bit_cnext_char;
  589. result.cprev_char_safe = str_8bit_cprev_char;
  590. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  591. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  592. result.char_isspace = str_8bit_isspace;
  593. result.char_ispunct = str_8bit_ispunct;
  594. result.char_isalnum = str_8bit_isalnum;
  595. result.char_isdigit = str_8bit_isdigit;
  596. result.char_isprint = str_8bit_isprint;
  597. result.char_iscombiningmark = str_8bit_iscombiningmark;
  598. result.char_toupper = str_8bit_toupper;
  599. result.char_tolower = str_8bit_tolower;
  600. result.length = str_8bit_length;
  601. result.length2 = str_8bit_length2;
  602. result.length_noncomb = str_8bit_length;
  603. result.fix_string = str_8bit_fix_string;
  604. result.term_form = str_8bit_term_form;
  605. result.fit_to_term = str_8bit_fit_to_term;
  606. result.term_trim = str_8bit_term_trim;
  607. result.term_width2 = str_8bit_term_width2;
  608. result.term_width1 = str_8bit_term_width1;
  609. result.term_char_width = str_8bit_term_char_width;
  610. result.term_substring = str_8bit_term_substring;
  611. result.trunc = str_8bit_trunc;
  612. result.offset_to_pos = str_8bit_offset_to_pos;
  613. result.column_to_pos = str_8bit_column_to_pos;
  614. result.create_search_needle = str_8bit_create_search_needle;
  615. result.release_search_needle = str_8bit_release_search_needle;
  616. result.search_first = str_8bit_search_first;
  617. result.search_last = str_8bit_search_last;
  618. result.compare = str_8bit_compare;
  619. result.ncompare = str_8bit_ncompare;
  620. result.casecmp = str_8bit_casecmp;
  621. result.ncasecmp = str_8bit_ncasecmp;
  622. result.prefix = str_8bit_prefix;
  623. result.caseprefix = str_8bit_caseprefix;
  624. result.create_key = str_8bit_create_key;
  625. result.create_key_for_filename = str_8bit_create_key;
  626. result.key_collate = str_8bit_key_collate;
  627. result.release_key = str_8bit_release_key;
  628. return result;
  629. }