strutil8bit.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. /*
  2. 8bit strings utilities
  3. Copyright (C) 2007-2015
  4. Free Software Foundation, Inc.
  5. Written by:
  6. Rostislav Benes, 2007
  7. This file is part of the Midnight Commander.
  8. The Midnight Commander is free software: you can redistribute it
  9. and/or modify it under the terms of the GNU General Public License as
  10. published by the Free Software Foundation, either version 3 of the License,
  11. or (at your option) any later version.
  12. The Midnight Commander is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. GNU General Public License for more details.
  16. You should have received a copy of the GNU General Public License
  17. along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <config.h>
  20. #include <ctype.h>
  21. #include <stdlib.h>
  22. #include "lib/global.h"
  23. #include "lib/strutil.h"
  24. /* functions for singlebyte encodings, all characters have width 1
  25. * using standard system functions
  26. * there are only small differences between functions in strutil8bit.c
  27. * and strutilascii.c
  28. */
  29. static const char replch = '?';
  30. /*
  31. * Inlines to equalize 'char' signedness for single 'char' encodings.
  32. * Instead of writing
  33. * isspace((unsigned char)c);
  34. * you can write
  35. * char_isspace(c);
  36. */
  37. #define DECLARE_CTYPE_WRAPPER(func_name) \
  38. static inline int char_##func_name(char c) \
  39. { \
  40. return func_name((int)(unsigned char)c); \
  41. }
  42. /* *INDENT-OFF* */
  43. DECLARE_CTYPE_WRAPPER (isalnum)
  44. DECLARE_CTYPE_WRAPPER (isalpha)
  45. DECLARE_CTYPE_WRAPPER (isascii)
  46. DECLARE_CTYPE_WRAPPER (isblank)
  47. DECLARE_CTYPE_WRAPPER (iscntrl)
  48. DECLARE_CTYPE_WRAPPER (isdigit)
  49. DECLARE_CTYPE_WRAPPER (isgraph)
  50. DECLARE_CTYPE_WRAPPER (islower)
  51. DECLARE_CTYPE_WRAPPER (isprint)
  52. DECLARE_CTYPE_WRAPPER (ispunct)
  53. DECLARE_CTYPE_WRAPPER (isspace)
  54. DECLARE_CTYPE_WRAPPER (isupper)
  55. DECLARE_CTYPE_WRAPPER (isxdigit)
  56. DECLARE_CTYPE_WRAPPER (toupper)
  57. DECLARE_CTYPE_WRAPPER (tolower)
  58. /* *INDENT-ON* */
  59. static void
  60. str_8bit_insert_replace_char (GString * buffer)
  61. {
  62. g_string_append_c (buffer, replch);
  63. }
  64. static int
  65. str_8bit_is_valid_string (const char *text)
  66. {
  67. (void) text;
  68. return 1;
  69. }
  70. static int
  71. str_8bit_is_valid_char (const char *ch, size_t size)
  72. {
  73. (void) ch;
  74. (void) size;
  75. return 1;
  76. }
  77. static void
  78. str_8bit_cnext_char (const char **text)
  79. {
  80. (*text)++;
  81. }
  82. static void
  83. str_8bit_cprev_char (const char **text)
  84. {
  85. (*text)--;
  86. }
  87. static int
  88. str_8bit_cnext_noncomb_char (const char **text)
  89. {
  90. if (*text[0] == '\0')
  91. return 0;
  92. (*text)++;
  93. return 1;
  94. }
  95. static int
  96. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  97. {
  98. if ((*text) == begin)
  99. return 0;
  100. (*text)--;
  101. return 1;
  102. }
  103. static int
  104. str_8bit_isspace (const char *text)
  105. {
  106. return char_isspace (text[0]);
  107. }
  108. static int
  109. str_8bit_ispunct (const char *text)
  110. {
  111. return char_ispunct (text[0]);
  112. }
  113. static int
  114. str_8bit_isalnum (const char *text)
  115. {
  116. return char_isalnum (text[0]);
  117. }
  118. static int
  119. str_8bit_isdigit (const char *text)
  120. {
  121. return char_isdigit (text[0]);
  122. }
  123. static int
  124. str_8bit_isprint (const char *text)
  125. {
  126. return char_isprint (text[0]);
  127. }
  128. static gboolean
  129. str_8bit_iscombiningmark (const char *text)
  130. {
  131. (void) text;
  132. return FALSE;
  133. }
  134. static int
  135. str_8bit_toupper (const char *text, char **out, size_t * remain)
  136. {
  137. if (*remain <= 1)
  138. return 0;
  139. (*out)[0] = char_toupper (text[0]);
  140. (*out)++;
  141. (*remain)--;
  142. return 1;
  143. }
  144. static int
  145. str_8bit_tolower (const char *text, char **out, size_t * remain)
  146. {
  147. if (*remain <= 1)
  148. return 0;
  149. (*out)[0] = char_tolower (text[0]);
  150. (*out)++;
  151. (*remain)--;
  152. return 1;
  153. }
  154. static int
  155. str_8bit_length (const char *text)
  156. {
  157. return strlen (text);
  158. }
  159. static int
  160. str_8bit_length2 (const char *text, int size)
  161. {
  162. return (size >= 0) ? min (strlen (text), (gsize) size) : strlen (text);
  163. }
  164. static gchar *
  165. str_8bit_conv_gerror_message (GError * mcerror, const char *def_msg)
  166. {
  167. GIConv conv;
  168. gchar *ret;
  169. /* glib messages are in UTF-8 charset */
  170. conv = str_crt_conv_from ("UTF-8");
  171. if (conv == INVALID_CONV)
  172. ret = g_strdup (def_msg != NULL ? def_msg : "");
  173. else
  174. {
  175. GString *buf;
  176. buf = g_string_new ("");
  177. if (str_convert (conv, mcerror->message, buf) != ESTR_FAILURE)
  178. ret = g_string_free (buf, FALSE);
  179. else
  180. {
  181. ret = g_strdup (def_msg != NULL ? def_msg : "");
  182. g_string_free (buf, TRUE);
  183. }
  184. str_close_conv (conv);
  185. }
  186. return ret;
  187. }
  188. static estr_t
  189. str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
  190. {
  191. estr_t result = ESTR_SUCCESS;
  192. if (coder == str_cnv_not_convert)
  193. g_string_append_len (buffer, string, size);
  194. else
  195. result = str_nconvert (coder, (char *) string, size, buffer);
  196. return result;
  197. }
  198. static const char *
  199. str_8bit_term_form (const char *text)
  200. {
  201. static char result[BUF_MEDIUM];
  202. char *actual;
  203. size_t remain;
  204. size_t length;
  205. size_t pos = 0;
  206. actual = result;
  207. remain = sizeof (result);
  208. length = strlen (text);
  209. for (; pos < length && remain > 1; pos++, actual++, remain--)
  210. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  211. actual[0] = '\0';
  212. return result;
  213. }
  214. static const char *
  215. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  216. {
  217. static char result[BUF_MEDIUM];
  218. char *actual;
  219. size_t remain;
  220. int ident = 0;
  221. size_t length;
  222. size_t pos = 0;
  223. length = strlen (text);
  224. actual = result;
  225. remain = sizeof (result);
  226. if ((int) length <= width)
  227. {
  228. switch (HIDE_FIT (just_mode))
  229. {
  230. case J_CENTER_LEFT:
  231. case J_CENTER:
  232. ident = (width - length) / 2;
  233. break;
  234. case J_RIGHT:
  235. ident = width - length;
  236. break;
  237. }
  238. if ((int) remain <= ident)
  239. goto finally;
  240. memset (actual, ' ', ident);
  241. actual += ident;
  242. remain -= ident;
  243. for (; pos < length && remain > 1; pos++, actual++, remain--)
  244. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  245. if (width - length - ident > 0)
  246. {
  247. if (remain <= width - length - ident)
  248. goto finally;
  249. memset (actual, ' ', width - length - ident);
  250. actual += width - length - ident;
  251. }
  252. }
  253. else if (IS_FIT (just_mode))
  254. {
  255. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  256. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  257. if (remain <= 1)
  258. goto finally;
  259. actual[0] = '~';
  260. actual++;
  261. remain--;
  262. pos += length - width + 1;
  263. for (; pos < length && remain > 1; pos++, actual++, remain--)
  264. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  265. }
  266. else
  267. {
  268. switch (HIDE_FIT (just_mode))
  269. {
  270. case J_CENTER:
  271. ident = (length - width) / 2;
  272. break;
  273. case J_RIGHT:
  274. ident = length - width;
  275. break;
  276. }
  277. pos += ident;
  278. for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--)
  279. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  280. }
  281. finally:
  282. if (actual >= result + sizeof (result))
  283. actual = result + sizeof (result) - 1;
  284. actual[0] = '\0';
  285. return result;
  286. }
  287. static const char *
  288. str_8bit_term_trim (const char *text, int width)
  289. {
  290. static char result[BUF_MEDIUM];
  291. size_t remain;
  292. char *actual;
  293. size_t length;
  294. length = strlen (text);
  295. actual = result;
  296. remain = sizeof (result);
  297. if (width > 0)
  298. {
  299. size_t pos;
  300. if (width >= (int) length)
  301. {
  302. for (pos = 0; pos < length && remain > 1; pos++, actual++, remain--)
  303. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  304. }
  305. else if (width <= 3)
  306. {
  307. memset (actual, '.', width);
  308. actual += width;
  309. }
  310. else
  311. {
  312. memset (actual, '.', 3);
  313. actual += 3;
  314. remain -= 3;
  315. for (pos = length - width + 3; pos < length && remain > 1; pos++, actual++, remain--)
  316. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  317. }
  318. }
  319. actual[0] = '\0';
  320. return result;
  321. }
  322. static int
  323. str_8bit_term_width2 (const char *text, size_t length)
  324. {
  325. return (length != (size_t) (-1)) ? min (strlen (text), length) : strlen (text);
  326. }
  327. static int
  328. str_8bit_term_width1 (const char *text)
  329. {
  330. return str_8bit_term_width2 (text, (size_t) (-1));
  331. }
  332. static int
  333. str_8bit_term_char_width (const char *text)
  334. {
  335. (void) text;
  336. return 1;
  337. }
  338. static const char *
  339. str_8bit_term_substring (const char *text, int start, int width)
  340. {
  341. static char result[BUF_MEDIUM];
  342. size_t remain;
  343. char *actual;
  344. size_t length;
  345. actual = result;
  346. remain = sizeof (result);
  347. length = strlen (text);
  348. if (start < (int) length)
  349. {
  350. size_t pos;
  351. for (pos = start; pos < length && width > 0 && remain > 1;
  352. pos++, width--, actual++, remain--)
  353. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  354. }
  355. for (; width > 0 && remain > 1; actual++, remain--, width--)
  356. actual[0] = ' ';
  357. actual[0] = '\0';
  358. return result;
  359. }
  360. static const char *
  361. str_8bit_trunc (const char *text, int width)
  362. {
  363. static char result[MC_MAXPATHLEN];
  364. int remain;
  365. char *actual;
  366. size_t pos = 0;
  367. size_t length;
  368. actual = result;
  369. remain = sizeof (result);
  370. length = strlen (text);
  371. if ((int) length > width)
  372. {
  373. for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--)
  374. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  375. if (remain <= 1)
  376. goto finally;
  377. actual[0] = '~';
  378. actual++;
  379. remain--;
  380. pos += length - width + 1;
  381. for (; pos < length && remain > 1; pos++, actual++, remain--)
  382. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  383. }
  384. else
  385. {
  386. for (; pos < length && remain > 1; pos++, actual++, remain--)
  387. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  388. }
  389. finally:
  390. actual[0] = '\0';
  391. return result;
  392. }
  393. static int
  394. str_8bit_offset_to_pos (const char *text, size_t length)
  395. {
  396. (void) text;
  397. return (int) length;
  398. }
  399. static int
  400. str_8bit_column_to_pos (const char *text, size_t pos)
  401. {
  402. (void) text;
  403. return (int) pos;
  404. }
  405. static char *
  406. str_8bit_create_search_needle (const char *needle, int case_sen)
  407. {
  408. (void) case_sen;
  409. return (char *) needle;
  410. }
  411. static void
  412. str_8bit_release_search_needle (char *needle, int case_sen)
  413. {
  414. (void) case_sen;
  415. (void) needle;
  416. }
  417. static char *
  418. str_8bit_strdown (const char *str)
  419. {
  420. char *rets, *p;
  421. if (str == NULL)
  422. return NULL;
  423. rets = g_strdup (str);
  424. for (p = rets; *p != '\0'; p++)
  425. *p = char_tolower (*p);
  426. return rets;
  427. }
  428. static const char *
  429. str_8bit_search_first (const char *text, const char *search, int case_sen)
  430. {
  431. char *fold_text;
  432. char *fold_search;
  433. const char *match;
  434. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  435. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  436. match = g_strstr_len (fold_text, -1, fold_search);
  437. if (match != NULL)
  438. {
  439. size_t offset;
  440. offset = match - fold_text;
  441. match = text + offset;
  442. }
  443. if (!case_sen)
  444. {
  445. g_free (fold_text);
  446. g_free (fold_search);
  447. }
  448. return match;
  449. }
  450. static const char *
  451. str_8bit_search_last (const char *text, const char *search, int case_sen)
  452. {
  453. char *fold_text;
  454. char *fold_search;
  455. const char *match;
  456. fold_text = (case_sen) ? (char *) text : str_8bit_strdown (text);
  457. fold_search = (case_sen) ? (char *) search : str_8bit_strdown (search);
  458. match = g_strrstr_len (fold_text, -1, fold_search);
  459. if (match != NULL)
  460. {
  461. size_t offset;
  462. offset = match - fold_text;
  463. match = text + offset;
  464. }
  465. if (!case_sen)
  466. {
  467. g_free (fold_text);
  468. g_free (fold_search);
  469. }
  470. return match;
  471. }
  472. static int
  473. str_8bit_compare (const char *t1, const char *t2)
  474. {
  475. return strcmp (t1, t2);
  476. }
  477. static int
  478. str_8bit_ncompare (const char *t1, const char *t2)
  479. {
  480. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  481. }
  482. static int
  483. str_8bit_casecmp (const char *s1, const char *s2)
  484. {
  485. /* code from GLib */
  486. #ifdef HAVE_STRCASECMP
  487. g_return_val_if_fail (s1 != NULL, 0);
  488. g_return_val_if_fail (s2 != NULL, 0);
  489. return strcasecmp (s1, s2);
  490. #else
  491. gint c1, c2;
  492. g_return_val_if_fail (s1 != NULL, 0);
  493. g_return_val_if_fail (s2 != NULL, 0);
  494. while (*s1 != '\0' && *s2 != '\0')
  495. {
  496. /* According to A. Cox, some platforms have islower's that
  497. * don't work right on non-uppercase
  498. */
  499. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  500. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  501. if (c1 != c2)
  502. return (c1 - c2);
  503. s1++;
  504. s2++;
  505. }
  506. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  507. #endif
  508. }
  509. static int
  510. str_8bit_ncasecmp (const char *s1, const char *s2)
  511. {
  512. size_t n;
  513. g_return_val_if_fail (s1 != NULL, 0);
  514. g_return_val_if_fail (s2 != NULL, 0);
  515. n = min (strlen (s1), strlen (s2));
  516. /* code from GLib */
  517. #ifdef HAVE_STRNCASECMP
  518. return strncasecmp (s1, s2, n);
  519. #else
  520. gint c1, c2;
  521. while (n != 0 && *s1 != '\0' && *s2 != '\0')
  522. {
  523. n -= 1;
  524. /* According to A. Cox, some platforms have islower's that
  525. * don't work right on non-uppercase
  526. */
  527. c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1;
  528. c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2;
  529. if (c1 != c2)
  530. return (c1 - c2);
  531. s1++;
  532. s2++;
  533. }
  534. if (n == 0)
  535. return 0;
  536. return (((gint) (guchar) * s1) - ((gint) (guchar) * s2));
  537. #endif
  538. }
  539. static int
  540. str_8bit_prefix (const char *text, const char *prefix)
  541. {
  542. int result;
  543. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  544. && text[result] == prefix[result]; result++);
  545. return result;
  546. }
  547. static int
  548. str_8bit_caseprefix (const char *text, const char *prefix)
  549. {
  550. int result;
  551. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  552. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  553. return result;
  554. }
  555. static void
  556. str_8bit_fix_string (char *text)
  557. {
  558. (void) text;
  559. }
  560. static char *
  561. str_8bit_create_key (const char *text, int case_sen)
  562. {
  563. return (case_sen) ? (char *) text : str_8bit_strdown (text);
  564. }
  565. static int
  566. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  567. {
  568. if (case_sen)
  569. return strcmp (t1, t2);
  570. else
  571. return strcoll (t1, t2);
  572. }
  573. static void
  574. str_8bit_release_key (char *key, int case_sen)
  575. {
  576. if (!case_sen)
  577. g_free (key);
  578. }
  579. struct str_class
  580. str_8bit_init (void)
  581. {
  582. struct str_class result;
  583. result.conv_gerror_message = str_8bit_conv_gerror_message;
  584. result.vfs_convert_to = str_8bit_vfs_convert_to;
  585. result.insert_replace_char = str_8bit_insert_replace_char;
  586. result.is_valid_string = str_8bit_is_valid_string;
  587. result.is_valid_char = str_8bit_is_valid_char;
  588. result.cnext_char = str_8bit_cnext_char;
  589. result.cprev_char = str_8bit_cprev_char;
  590. result.cnext_char_safe = str_8bit_cnext_char;
  591. result.cprev_char_safe = str_8bit_cprev_char;
  592. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  593. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  594. result.char_isspace = str_8bit_isspace;
  595. result.char_ispunct = str_8bit_ispunct;
  596. result.char_isalnum = str_8bit_isalnum;
  597. result.char_isdigit = str_8bit_isdigit;
  598. result.char_isprint = str_8bit_isprint;
  599. result.char_iscombiningmark = str_8bit_iscombiningmark;
  600. result.char_toupper = str_8bit_toupper;
  601. result.char_tolower = str_8bit_tolower;
  602. result.length = str_8bit_length;
  603. result.length2 = str_8bit_length2;
  604. result.length_noncomb = str_8bit_length;
  605. result.fix_string = str_8bit_fix_string;
  606. result.term_form = str_8bit_term_form;
  607. result.fit_to_term = str_8bit_fit_to_term;
  608. result.term_trim = str_8bit_term_trim;
  609. result.term_width2 = str_8bit_term_width2;
  610. result.term_width1 = str_8bit_term_width1;
  611. result.term_char_width = str_8bit_term_char_width;
  612. result.term_substring = str_8bit_term_substring;
  613. result.trunc = str_8bit_trunc;
  614. result.offset_to_pos = str_8bit_offset_to_pos;
  615. result.column_to_pos = str_8bit_column_to_pos;
  616. result.create_search_needle = str_8bit_create_search_needle;
  617. result.release_search_needle = str_8bit_release_search_needle;
  618. result.search_first = str_8bit_search_first;
  619. result.search_last = str_8bit_search_last;
  620. result.compare = str_8bit_compare;
  621. result.ncompare = str_8bit_ncompare;
  622. result.casecmp = str_8bit_casecmp;
  623. result.ncasecmp = str_8bit_ncasecmp;
  624. result.prefix = str_8bit_prefix;
  625. result.caseprefix = str_8bit_caseprefix;
  626. result.create_key = str_8bit_create_key;
  627. result.create_key_for_filename = str_8bit_create_key;
  628. result.key_collate = str_8bit_key_collate;
  629. result.release_key = str_8bit_release_key;
  630. return result;
  631. }