strutil8bit.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. /* 8bit strings utilities
  2. Copyright (C) 2007 Free Software Foundation, Inc.
  3. Written 2007 by:
  4. Rostislav Benes
  5. The file_date routine is mostly from GNU's fileutils package,
  6. written by Richard Stallman and David MacKenzie.
  7. This program is free software; you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation; either version 2 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. */
  19. #include <config.h>
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <errno.h>
  23. #include "lib/global.h"
  24. #include "lib/strutil.h"
  25. /* functions for singlebyte encodings, all characters have width 1
  26. * using standard system functions
  27. * there are only small differences between functions in strutil8bit.c
  28. * and strutilascii.c
  29. */
  30. static const char replch = '?';
  31. /*
  32. * Inlines to equalize 'char' signedness for single 'char' encodings.
  33. * Instead of writing
  34. * isspace((unsigned char)c);
  35. * you can write
  36. * char_isspace(c);
  37. */
  38. #define DECLARE_CTYPE_WRAPPER(func_name) \
  39. static inline int char_##func_name(char c) \
  40. { \
  41. return func_name((int)(unsigned char)c); \
  42. }
  43. DECLARE_CTYPE_WRAPPER(isalnum)
  44. DECLARE_CTYPE_WRAPPER(isalpha)
  45. DECLARE_CTYPE_WRAPPER(isascii)
  46. DECLARE_CTYPE_WRAPPER(isblank)
  47. DECLARE_CTYPE_WRAPPER(iscntrl)
  48. DECLARE_CTYPE_WRAPPER(isdigit)
  49. DECLARE_CTYPE_WRAPPER(isgraph)
  50. DECLARE_CTYPE_WRAPPER(islower)
  51. DECLARE_CTYPE_WRAPPER(isprint)
  52. DECLARE_CTYPE_WRAPPER(ispunct)
  53. DECLARE_CTYPE_WRAPPER(isspace)
  54. DECLARE_CTYPE_WRAPPER(isupper)
  55. DECLARE_CTYPE_WRAPPER(isxdigit)
  56. DECLARE_CTYPE_WRAPPER(toupper)
  57. DECLARE_CTYPE_WRAPPER(tolower)
  58. static void
  59. str_8bit_insert_replace_char (GString * buffer)
  60. {
  61. g_string_append_c (buffer, replch);
  62. }
  63. static int
  64. str_8bit_is_valid_string (const char *text)
  65. {
  66. (void) text;
  67. return 1;
  68. }
  69. static int
  70. str_8bit_is_valid_char (const char *ch, size_t size)
  71. {
  72. (void) ch;
  73. (void) size;
  74. return 1;
  75. }
  76. static void
  77. str_8bit_cnext_char (const char **text)
  78. {
  79. (*text)++;
  80. }
  81. static void
  82. str_8bit_cprev_char (const char **text)
  83. {
  84. (*text)--;
  85. }
  86. static int
  87. str_8bit_cnext_noncomb_char (const char **text)
  88. {
  89. if (*text[0] != '\0')
  90. {
  91. (*text)++;
  92. return 1;
  93. }
  94. else
  95. return 0;
  96. }
  97. static int
  98. str_8bit_cprev_noncomb_char (const char **text, const char *begin)
  99. {
  100. if ((*text) != begin)
  101. {
  102. (*text)--;
  103. return 1;
  104. }
  105. else
  106. return 0;
  107. }
  108. static int
  109. str_8bit_isspace (const char *text)
  110. {
  111. return char_isspace (text[0]);
  112. }
  113. static int
  114. str_8bit_ispunct (const char *text)
  115. {
  116. return char_ispunct (text[0]);
  117. }
  118. static int
  119. str_8bit_isalnum (const char *text)
  120. {
  121. return char_isalnum (text[0]);
  122. }
  123. static int
  124. str_8bit_isdigit (const char *text)
  125. {
  126. return char_isdigit (text[0]);
  127. }
  128. static int
  129. str_8bit_isprint (const char *text)
  130. {
  131. return char_isprint (text[0]);
  132. }
  133. static int
  134. str_8bit_iscombiningmark (const char *text)
  135. {
  136. (void) text;
  137. return 0;
  138. }
  139. static int
  140. str_8bit_toupper (const char *text, char **out, size_t * remain)
  141. {
  142. if (*remain <= 1)
  143. return 0;
  144. (*out)[0] = char_toupper (text[0]);
  145. (*out)++;
  146. (*remain)--;
  147. return 1;
  148. }
  149. static int
  150. str_8bit_tolower (const char *text, char **out, size_t * remain)
  151. {
  152. if (*remain <= 1)
  153. return 0;
  154. (*out)[0] = char_tolower (text[0]);
  155. (*out)++;
  156. (*remain)--;
  157. return 1;
  158. }
  159. static int
  160. str_8bit_length (const char *text)
  161. {
  162. return strlen (text);
  163. }
  164. static int
  165. str_8bit_length2 (const char *text, int size)
  166. {
  167. return (size >= 0) ? min (strlen (text), (gsize)size) : strlen (text);
  168. }
  169. static gchar *
  170. str_8bit_conv_gerror_message (GError *error, const char *def_msg)
  171. {
  172. GIConv conv;
  173. gchar *ret;
  174. /* glib messages are in UTF-8 charset */
  175. conv = str_crt_conv_from ("UTF-8");
  176. if (conv == INVALID_CONV)
  177. ret = g_strdup (def_msg != NULL ? def_msg : "");
  178. else {
  179. GString *buf;
  180. buf = g_string_new ("");
  181. if (str_convert (conv, error->message, buf) != ESTR_FAILURE) {
  182. ret = buf->str;
  183. g_string_free (buf, FALSE);
  184. } else {
  185. ret = g_strdup (def_msg != NULL ? def_msg : "");
  186. g_string_free (buf, TRUE);
  187. }
  188. str_close_conv (conv);
  189. }
  190. return ret;
  191. }
  192. static estr_t
  193. str_8bit_vfs_convert_to (GIConv coder, const char *string,
  194. int size, GString * buffer)
  195. {
  196. estr_t result;
  197. if (coder == str_cnv_not_convert)
  198. {
  199. g_string_append_len (buffer, string, size);
  200. result = ESTR_SUCCESS;
  201. }
  202. else
  203. result = str_nconvert (coder, (char *) string, size, buffer);
  204. return result;
  205. }
  206. static const char *
  207. str_8bit_term_form (const char *text)
  208. {
  209. static char result[BUF_MEDIUM];
  210. char *actual;
  211. size_t remain;
  212. size_t length;
  213. size_t pos = 0;
  214. actual = result;
  215. remain = sizeof (result);
  216. length = strlen (text);
  217. for (; pos < length && remain > 1; pos++, actual++, remain--)
  218. {
  219. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  220. }
  221. actual[0] = '\0';
  222. return result;
  223. }
  224. static const char *
  225. str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode)
  226. {
  227. static char result[BUF_MEDIUM];
  228. char *actual;
  229. size_t remain;
  230. int ident;
  231. size_t length;
  232. size_t pos = 0;
  233. length = strlen (text);
  234. actual = result;
  235. remain = sizeof (result);
  236. if ((int)length <= width)
  237. {
  238. ident = 0;
  239. switch (HIDE_FIT (just_mode))
  240. {
  241. case J_CENTER_LEFT:
  242. case J_CENTER:
  243. ident = (width - length) / 2;
  244. break;
  245. case J_RIGHT:
  246. ident = width - length;
  247. break;
  248. }
  249. if ((int)remain <= ident)
  250. goto finally;
  251. memset (actual, ' ', ident);
  252. actual += ident;
  253. remain -= ident;
  254. for (; pos < length && remain > 1; pos++, actual++, remain--)
  255. {
  256. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  257. }
  258. if (width - length - ident > 0)
  259. {
  260. if (remain <= width - length - ident)
  261. goto finally;
  262. memset (actual, ' ', width - length - ident);
  263. actual += width - length - ident;
  264. remain -= width - length - ident;
  265. }
  266. }
  267. else
  268. {
  269. if (IS_FIT (just_mode))
  270. {
  271. for (; pos + 1 <= (gsize)width / 2 && remain > 1;
  272. actual++, pos++, remain--)
  273. {
  274. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  275. }
  276. if (remain <= 1)
  277. goto finally;
  278. actual[0] = '~';
  279. actual++;
  280. remain--;
  281. pos += length - width + 1;
  282. for (; pos < length && remain > 1; pos++, actual++, remain--)
  283. {
  284. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  285. }
  286. }
  287. else
  288. {
  289. ident = 0;
  290. switch (HIDE_FIT (just_mode))
  291. {
  292. case J_CENTER:
  293. ident = (length - width) / 2;
  294. break;
  295. case J_RIGHT:
  296. ident = length - width;
  297. break;
  298. }
  299. pos += ident;
  300. for (; pos < (gsize)(ident + width) && remain > 1;
  301. pos++, actual++, remain--)
  302. {
  303. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  304. }
  305. }
  306. }
  307. finally:
  308. actual[0] = '\0';
  309. return result;
  310. }
  311. static const char *
  312. str_8bit_term_trim (const char *text, int width)
  313. {
  314. static char result[BUF_MEDIUM];
  315. size_t remain;
  316. char *actual;
  317. size_t pos = 0;
  318. size_t length;
  319. length = strlen (text);
  320. actual = result;
  321. remain = sizeof (result);
  322. if (width < (int)length)
  323. {
  324. if (width <= 3)
  325. {
  326. memset (actual, '.', width);
  327. actual += width;
  328. remain -= width;
  329. }
  330. else
  331. {
  332. memset (actual, '.', 3);
  333. actual += 3;
  334. remain -= 3;
  335. pos += length - width + 3;
  336. for (; pos < length && remain > 1; pos++, actual++, remain--)
  337. {
  338. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  339. }
  340. }
  341. }
  342. else
  343. {
  344. for (; pos < length && remain > 1; pos++, actual++, remain--)
  345. {
  346. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  347. }
  348. }
  349. actual[0] = '\0';
  350. return result;
  351. }
  352. static int
  353. str_8bit_term_width2 (const char *text, size_t length)
  354. {
  355. return (length != (size_t) (-1))
  356. ? min (strlen (text), length) : strlen (text);
  357. }
  358. static int
  359. str_8bit_term_width1 (const char *text)
  360. {
  361. return str_8bit_term_width2 (text, (size_t) (-1));
  362. }
  363. static int
  364. str_8bit_term_char_width (const char *text)
  365. {
  366. (void) text;
  367. return 1;
  368. }
  369. static void
  370. str_8bit_msg_term_size (const char *text, int *lines, int *columns)
  371. {
  372. char *p, *tmp;
  373. char *q;
  374. char c = '\0';
  375. int width;
  376. (*lines) = 1;
  377. (*columns) = 0;
  378. tmp = g_strdup ((char *)text);
  379. p = tmp;
  380. for (;;)
  381. {
  382. q = strchr (p, '\n');
  383. if (q != NULL)
  384. {
  385. c = q[0];
  386. q[0] = '\0';
  387. }
  388. width = str_8bit_term_width1 (p);
  389. if (width > (*columns))
  390. (*columns) = width;
  391. if (q == NULL)
  392. break;
  393. q[0] = c;
  394. p = q + 1;
  395. (*lines)++;
  396. }
  397. g_free (tmp);
  398. }
  399. static const char *
  400. str_8bit_term_substring (const char *text, int start, int width)
  401. {
  402. static char result[BUF_MEDIUM];
  403. size_t remain;
  404. char *actual;
  405. size_t pos = 0;
  406. size_t length;
  407. actual = result;
  408. remain = sizeof (result);
  409. length = strlen (text);
  410. if (start < (int)length)
  411. {
  412. pos += start;
  413. for (; pos < length && width > 0 && remain > 1;
  414. pos++, width--, actual++, remain--)
  415. {
  416. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  417. }
  418. }
  419. for (; width > 0 && remain > 1; actual++, remain--, width--)
  420. {
  421. actual[0] = ' ';
  422. }
  423. actual[0] = '\0';
  424. return result;
  425. }
  426. static const char *
  427. str_8bit_trunc (const char *text, int width)
  428. {
  429. static char result[MC_MAXPATHLEN];
  430. int remain;
  431. char *actual;
  432. size_t pos = 0;
  433. size_t length;
  434. actual = result;
  435. remain = sizeof (result);
  436. length = strlen (text);
  437. if ((int)length > width)
  438. {
  439. for (; pos + 1 <= (gsize)width / 2 && remain > 1; actual++, pos++, remain--)
  440. {
  441. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  442. }
  443. if (remain <= 1)
  444. goto finally;
  445. actual[0] = '~';
  446. actual++;
  447. remain--;
  448. pos += length - width + 1;
  449. for (; pos < length && remain > 1; pos++, actual++, remain--)
  450. {
  451. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  452. }
  453. }
  454. else
  455. {
  456. for (; pos < length && remain > 1; pos++, actual++, remain--)
  457. {
  458. actual[0] = char_isprint (text[pos]) ? text[pos] : '.';
  459. }
  460. }
  461. finally:
  462. actual[0] = '\0';
  463. return result;
  464. }
  465. static int
  466. str_8bit_offset_to_pos (const char *text, size_t length)
  467. {
  468. (void) text;
  469. return (int) length;
  470. }
  471. static int
  472. str_8bit_column_to_pos (const char *text, size_t pos)
  473. {
  474. (void) text;
  475. return (int)pos;
  476. }
  477. static char *
  478. str_8bit_create_search_needle (const char *needle, int case_sen)
  479. {
  480. (void) case_sen;
  481. return (char *) needle;
  482. }
  483. static void
  484. str_8bit_release_search_needle (char *needle, int case_sen)
  485. {
  486. (void) case_sen;
  487. (void) needle;
  488. }
  489. static const char *
  490. str_8bit_search_first (const char *text, const char *search, int case_sen)
  491. {
  492. char *fold_text;
  493. char *fold_search;
  494. const char *match;
  495. size_t offsset;
  496. fold_text = (case_sen) ? (char *) text : g_strdown (g_strdup (text));
  497. fold_search = (case_sen) ? (char *) search : g_strdown (g_strdup (search));
  498. match = g_strstr_len (fold_text, -1, fold_search);
  499. if (match != NULL)
  500. {
  501. offsset = match - fold_text;
  502. match = text + offsset;
  503. }
  504. if (!case_sen)
  505. {
  506. g_free (fold_text);
  507. g_free (fold_search);
  508. }
  509. return match;
  510. }
  511. static const char *
  512. str_8bit_search_last (const char *text, const char *search, int case_sen)
  513. {
  514. char *fold_text;
  515. char *fold_search;
  516. const char *match;
  517. size_t offsset;
  518. fold_text = (case_sen) ? (char *) text : g_strdown (g_strdup (text));
  519. fold_search = (case_sen) ? (char *) search : g_strdown (g_strdup (search));
  520. match = g_strrstr_len (fold_text, -1, fold_search);
  521. if (match != NULL)
  522. {
  523. offsset = match - fold_text;
  524. match = text + offsset;
  525. }
  526. if (!case_sen)
  527. {
  528. g_free (fold_text);
  529. g_free (fold_search);
  530. }
  531. return match;
  532. }
  533. static int
  534. str_8bit_compare (const char *t1, const char *t2)
  535. {
  536. return strcmp (t1, t2);
  537. }
  538. static int
  539. str_8bit_ncompare (const char *t1, const char *t2)
  540. {
  541. return strncmp (t1, t2, min (strlen (t1), strlen (t2)));
  542. }
  543. static int
  544. str_8bit_casecmp (const char *t1, const char *t2)
  545. {
  546. return g_strcasecmp (t1, t2);
  547. }
  548. static int
  549. str_8bit_ncasecmp (const char *t1, const char *t2)
  550. {
  551. return g_strncasecmp (t1, t2, min (strlen (t1), strlen (t2)));
  552. }
  553. static int
  554. str_8bit_prefix (const char *text, const char *prefix)
  555. {
  556. int result;
  557. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  558. && text[result] == prefix[result]; result++);
  559. return result;
  560. }
  561. static int
  562. str_8bit_caseprefix (const char *text, const char *prefix)
  563. {
  564. int result;
  565. for (result = 0; text[result] != '\0' && prefix[result] != '\0'
  566. && char_toupper (text[result]) == char_toupper (prefix[result]); result++);
  567. return result;
  568. }
  569. static void
  570. str_8bit_fix_string (char *text)
  571. {
  572. (void) text;
  573. }
  574. static char *
  575. str_8bit_create_key (const char *text, int case_sen)
  576. {
  577. return (case_sen) ? (char *) text : g_strdown (g_strdup (text));
  578. }
  579. static int
  580. str_8bit_key_collate (const char *t1, const char *t2, int case_sen)
  581. {
  582. if (case_sen)
  583. return strcmp (t1, t2);
  584. else
  585. return strcoll (t1, t2);
  586. }
  587. static void
  588. str_8bit_release_key (char *key, int case_sen)
  589. {
  590. if (!case_sen)
  591. g_free (key);
  592. }
  593. struct str_class
  594. str_8bit_init (void)
  595. {
  596. struct str_class result;
  597. result.conv_gerror_message = str_8bit_conv_gerror_message;
  598. result.vfs_convert_to = str_8bit_vfs_convert_to;
  599. result.insert_replace_char = str_8bit_insert_replace_char;
  600. result.is_valid_string = str_8bit_is_valid_string;
  601. result.is_valid_char = str_8bit_is_valid_char;
  602. result.cnext_char = str_8bit_cnext_char;
  603. result.cprev_char = str_8bit_cprev_char;
  604. result.cnext_char_safe = str_8bit_cnext_char;
  605. result.cprev_char_safe = str_8bit_cprev_char;
  606. result.cnext_noncomb_char = str_8bit_cnext_noncomb_char;
  607. result.cprev_noncomb_char = str_8bit_cprev_noncomb_char;
  608. result.isspace = str_8bit_isspace;
  609. result.ispunct = str_8bit_ispunct;
  610. result.isalnum = str_8bit_isalnum;
  611. result.isdigit = str_8bit_isdigit;
  612. result.isprint = str_8bit_isprint;
  613. result.iscombiningmark = str_8bit_iscombiningmark;
  614. result.toupper = str_8bit_toupper;
  615. result.tolower = str_8bit_tolower;
  616. result.length = str_8bit_length;
  617. result.length2 = str_8bit_length2;
  618. result.length_noncomb = str_8bit_length;
  619. result.fix_string = str_8bit_fix_string;
  620. result.term_form = str_8bit_term_form;
  621. result.fit_to_term = str_8bit_fit_to_term;
  622. result.term_trim = str_8bit_term_trim;
  623. result.term_width2 = str_8bit_term_width2;
  624. result.term_width1 = str_8bit_term_width1;
  625. result.term_char_width = str_8bit_term_char_width;
  626. result.msg_term_size = str_8bit_msg_term_size;
  627. result.term_substring = str_8bit_term_substring;
  628. result.trunc = str_8bit_trunc;
  629. result.offset_to_pos = str_8bit_offset_to_pos;
  630. result.column_to_pos = str_8bit_column_to_pos;
  631. result.create_search_needle = str_8bit_create_search_needle;
  632. result.release_search_needle = str_8bit_release_search_needle;
  633. result.search_first = str_8bit_search_first;
  634. result.search_last = str_8bit_search_last;
  635. result.compare = str_8bit_compare;
  636. result.ncompare = str_8bit_ncompare;
  637. result.casecmp = str_8bit_casecmp;
  638. result.ncasecmp = str_8bit_ncasecmp;
  639. result.prefix = str_8bit_prefix;
  640. result.caseprefix = str_8bit_caseprefix;
  641. result.create_key = str_8bit_create_key;
  642. result.create_key_for_filename = str_8bit_create_key;
  643. result.key_collate = str_8bit_key_collate;
  644. result.release_key = str_8bit_release_key;
  645. return result;
  646. }