strutil.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. #ifndef MC_STRUTIL_H
  2. #define MC_STRUTIL_H
  3. #include "lib/global.h" /* include glib.h */
  4. #include <sys/types.h>
  5. #include <inttypes.h>
  6. #include <string.h>
  7. /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
  8. * There are two sort of functions:
  9. * 1. functions for working with growing strings and conversion strings between
  10. * different encodings.
  11. * (implemented directly in strutil.c)
  12. * 2. functions, that hide differences between encodings derived from ASCII.
  13. * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
  14. * documentation is made for UTF-8 version of functions.
  15. */
  16. /* invalid strings
  17. * function, that works with invalid strings are marked with "I"
  18. * in documentation
  19. * invalid bytes of string are handled as one byte characters with width 1, they
  20. * are displayed as questionmarks, I-maked comparing functions try to keep
  21. * the original value of these bytes.
  22. */
  23. /* combining characters
  24. * displaynig: all handled as zero with characters, expect combing character
  25. * at the begin of string, this character has with one (space add before),
  26. * so str_term_width is not good for computing width of singles characters
  27. * (never return zero, expect emtpy string)
  28. * for compatibility are strings composed before displaynig
  29. * comparing: comparing decompose all string before comparing, n-compare
  30. * functions do not work as is usual, because same strings do not have to be
  31. * same length in UTF-8. So they return 0 if one string is prefix of the other
  32. * one.
  33. * str_prefix is used to determine, how many characters from one string are
  34. * prefix in second string. However, str_prefix return number of characters in
  35. * decompose form. (used in do_search (screen.c))
  36. */
  37. /*** typedefs(not structures) and defined constants **********************************************/
  38. #define IS_FIT(x) ((x) & 0x0010)
  39. #define MAKE_FIT(x) ((x) | 0x0010)
  40. #define HIDE_FIT(x) ((x) & 0x000f)
  41. #define INVALID_CONV ((GIConv) (-1))
  42. /*** enums ***************************************************************************************/
  43. /* results of conversion function
  44. */
  45. typedef enum
  46. {
  47. /* success means, that convertion has been finished successully
  48. */
  49. ESTR_SUCCESS = 0,
  50. /* problem means, that not every characters was successfully converted (They are
  51. * replaced with questionmark). So is impossible convert string back.
  52. */
  53. ESTR_PROBLEM = 1,
  54. /* failure means, that conversion is not possible (example: wrong encoding
  55. * of input string)
  56. */
  57. ESTR_FAILURE = 2
  58. } estr_t;
  59. /* alignment strings on terminal
  60. */
  61. typedef enum
  62. {
  63. J_LEFT = 0x01,
  64. J_RIGHT = 0x02,
  65. J_CENTER = 0x03,
  66. /* if there is enough space for string on terminal,
  67. * string is centered otherwise is aligned to left */
  68. J_CENTER_LEFT = 0x04,
  69. /* fit alignment, if string is to long, is truncated with '~' */
  70. J_LEFT_FIT = 0x11,
  71. J_RIGHT_FIT = 0x12,
  72. J_CENTER_FIT = 0x13,
  73. J_CENTER_LEFT_FIT = 0x14
  74. } align_crt_t;
  75. /* string-to-integer parsing results
  76. */
  77. typedef enum
  78. {
  79. LONGINT_OK = 0,
  80. /* These two values can be ORed together, to indicate that both errors occurred. */
  81. LONGINT_OVERFLOW = 1,
  82. LONGINT_INVALID_SUFFIX_CHAR = 2,
  83. LONGINT_INVALID_SUFFIX_CHAR_WITH_OVERFLOW = (LONGINT_INVALID_SUFFIX_CHAR | LONGINT_OVERFLOW),
  84. LONGINT_INVALID = 4
  85. } strtol_error_t;
  86. /*** structures declarations (and typedefs of structures)*****************************************/
  87. /* all functions in str_class must be defined for every encoding */
  88. struct str_class
  89. {
  90. /* *INDENT-OFF* */
  91. gchar *(*conv_gerror_message) (GError * error, const char *def_msg);
  92. /*I*/ estr_t (*vfs_convert_to) (GIConv coder, const char *string, int size, GString * buffer);
  93. /*I*/ void (*insert_replace_char) (GString * buffer);
  94. gboolean (*is_valid_string) (const char *);
  95. /*I*/ int (*is_valid_char) (const char *, size_t);
  96. /*I*/ void (*cnext_char) (const char **);
  97. void (*cprev_char) (const char **);
  98. void (*cnext_char_safe) (const char **);
  99. /*I*/ void (*cprev_char_safe) (const char **);
  100. /*I*/ int (*cnext_noncomb_char) (const char **text);
  101. /*I*/ int (*cprev_noncomb_char) (const char **text, const char *begin);
  102. /*I*/ gboolean (*char_isspace) (const char *);
  103. /*I*/ gboolean (*char_ispunct) (const char *);
  104. /*I*/ gboolean (*char_isalnum) (const char *);
  105. /*I*/ gboolean (*char_isdigit) (const char *);
  106. /*I*/ gboolean (*char_isprint) (const char *);
  107. /*I*/ gboolean (*char_iscombiningmark) (const char *);
  108. /*I*/ int (*length) (const char *);
  109. /*I*/ int (*length2) (const char *, int);
  110. /*I*/ int (*length_noncomb) (const char *);
  111. /*I*/ gboolean (*char_toupper) (const char *, char **, size_t *);
  112. gboolean (*char_tolower) (const char *, char **, size_t *);
  113. void (*fix_string) (char *);
  114. /*I*/ const char *(*term_form) (const char *);
  115. /*I*/ const char *(*fit_to_term) (const char *, int, align_crt_t);
  116. /*I*/ const char *(*term_trim) (const char *text, int width);
  117. /*I*/ const char *(*term_substring) (const char *, int, int);
  118. /*I*/ int (*term_width1) (const char *);
  119. /*I*/ int (*term_width2) (const char *, size_t);
  120. /*I*/ int (*term_char_width) (const char *);
  121. /*I*/ const char *(*trunc) (const char *, int);
  122. /*I*/ int (*offset_to_pos) (const char *, size_t);
  123. /*I*/ int (*column_to_pos) (const char *, size_t);
  124. /*I*/ char *(*create_search_needle) (const char *, gboolean);
  125. void (*release_search_needle) (char *, gboolean);
  126. const char *(*search_first) (const char *, const char *, gboolean);
  127. const char *(*search_last) (const char *, const char *, gboolean);
  128. int (*compare) (const char *, const char *);
  129. /*I*/ int (*ncompare) (const char *, const char *);
  130. /*I*/ int (*casecmp) (const char *, const char *);
  131. /*I*/ int (*ncasecmp) (const char *, const char *);
  132. /*I*/ int (*prefix) (const char *, const char *);
  133. /*I*/ int (*caseprefix) (const char *, const char *);
  134. /*I*/ char *(*create_key) (const char *text, gboolean case_sen);
  135. /*I*/ char *(*create_key_for_filename) (const char *text, gboolean case_sen);
  136. /*I*/ int (*key_collate) (const char *t1, const char *t2, gboolean case_sen);
  137. /*I*/ void (*release_key) (char *key, gboolean case_sen);
  138. /* *INDENT-ON* */
  139. };
  140. /*** global variables defined in .c file *********************************************************/
  141. /* standard convertors */
  142. extern GIConv str_cnv_to_term;
  143. extern GIConv str_cnv_from_term;
  144. /* from terminal encoding to terminal encoding */
  145. extern GIConv str_cnv_not_convert;
  146. /*** declarations of public functions ************************************************************/
  147. struct str_class str_utf8_init (void);
  148. struct str_class str_8bit_init (void);
  149. struct str_class str_ascii_init (void);
  150. /* create convertor from "from_enc" to terminal encoding
  151. * if "from_enc" is not supported return INVALID_CONV
  152. */
  153. GIConv str_crt_conv_from (const char *);
  154. /* create convertor from terminal encoding to "to_enc"
  155. * if "to_enc" is not supported return INVALID_CONV
  156. */
  157. GIConv str_crt_conv_to (const char *);
  158. /* close convertor, do not close str_cnv_to_term, str_cnv_from_term,
  159. * str_cnv_not_convert
  160. */
  161. void str_close_conv (GIConv);
  162. /* return on of not used buffers (.used == 0) or create new
  163. * returned buffer has set .used to 1
  164. */
  165. /* convert string using coder, result of conversion is appended at end of buffer
  166. * return ESTR_SUCCESS if there was no problem.
  167. * otherwise return ESTR_PROBLEM or ESTR_FAILURE
  168. */
  169. estr_t str_convert (GIConv, const char *, GString *);
  170. estr_t str_nconvert (GIConv, const char *, int, GString *);
  171. /* convert GError message (which in UTF-8) to terminal charset
  172. * def_char is used if result of error->str conversion if ESTR_FAILURE
  173. * return new allocated null-terminated string, which is need to be freed
  174. * I
  175. */
  176. gchar *str_conv_gerror_message (GError * error, const char *def_msg);
  177. /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
  178. * result to original string. (so no replace with questionmark)
  179. * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
  180. * so is possible to show file, that is not valid in terminal encoding
  181. */
  182. estr_t str_vfs_convert_from (GIConv, const char *, GString *);
  183. /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
  184. * does replace with questionmark
  185. * I
  186. */
  187. estr_t str_vfs_convert_to (GIConv, const char *, int, GString *);
  188. /* printf function for str_buffer, append result of printf at the end of buffer
  189. */
  190. /* *INDENT-OFF* */
  191. void str_printf (GString * buffer, const char *format, ...) G_GNUC_PRINTF (2, 3);
  192. /* *INDENT-ON* */
  193. /* add standard replacement character in terminal encoding
  194. */
  195. void str_insert_replace_char (GString *);
  196. /* init strings and set terminal encoding,
  197. * if is termenc NULL, detect terminal encoding
  198. * create all str_cnv_* and set functions for terminal encoding
  199. */
  200. void str_init_strings (const char *termenc);
  201. /* free all str_buffer and all str_cnv_*
  202. */
  203. void str_uninit_strings (void);
  204. /* try convert characters in ch to output using conv
  205. * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII
  206. * compatible encoding, for other must be set)
  207. * return ESTR_SUCCESS if conversion was successfully,
  208. * ESTR_PROBLEM if ch contains only part of characters,
  209. * ESTR_FAILURE if conversion is not possible
  210. */
  211. estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size,
  212. char *output, size_t out_size);
  213. /* test, if text is valid in terminal encoding
  214. * I
  215. */
  216. gboolean str_is_valid_string (const char *text);
  217. /* test, if first char of ch is valid
  218. * size, how many bytes characters occupied, could be (size_t)(-1)
  219. * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of
  220. * multibyte character
  221. * I
  222. */
  223. int str_is_valid_char (const char *ch, size_t size);
  224. /* return next characters after text, do not call on the end of string
  225. */
  226. char *str_get_next_char (char *text);
  227. const char *str_cget_next_char (const char *text);
  228. /* return previous characters before text, do not call on the start of strings
  229. */
  230. char *str_get_prev_char (char *text);
  231. const char *str_cget_prev_char (const char *text);
  232. /* set text to next characters, do not call on the end of string
  233. */
  234. void str_next_char (char **text);
  235. void str_cnext_char (const char **text);
  236. /* set text to previous characters, do not call on the start of strings
  237. */
  238. void str_prev_char (char **text);
  239. void str_cprev_char (const char **text);
  240. /* return next characters after text, do not call on the end of string
  241. * works with invalid string
  242. * I
  243. */
  244. char *str_get_next_char_safe (char *text);
  245. const char *str_cget_next_char_safe (const char *text);
  246. /* return previous characters before text, do not call on the start of strings
  247. * works with invalid string
  248. * I
  249. */
  250. char *str_get_prev_char_safe (char *text);
  251. const char *str_cget_prev_char_safe (const char *text);
  252. /* set text to next characters, do not call on the end of string
  253. * works with invalid string
  254. * I
  255. */
  256. void str_next_char_safe (char **text);
  257. void str_cnext_char_safe (const char **text);
  258. /* set text to previous characters, do not call on the start of strings
  259. * works with invalid string
  260. * I
  261. */
  262. void str_prev_char_safe (char **text);
  263. void str_cprev_char_safe (const char **text);
  264. /* set text to next noncombining characters, check the end of text
  265. * return how many characters was skipped
  266. * works with invalid string
  267. * I
  268. */
  269. int str_next_noncomb_char (char **text);
  270. int str_cnext_noncomb_char (const char **text);
  271. /* set text to previous noncombining characters, search stop at begin
  272. * return how many characters was skipped
  273. * works with invalid string
  274. * I
  275. */
  276. int str_prev_noncomb_char (char **text, const char *begin);
  277. int str_cprev_noncomb_char (const char **text, const char *begin);
  278. /* if first characters in ch is space, tabulator or new lines
  279. * I
  280. */
  281. gboolean str_isspace (const char *ch);
  282. /* if first characters in ch is punctuation or symbol
  283. * I
  284. */
  285. gboolean str_ispunct (const char *ch);
  286. /* if first characters in ch is alphanum
  287. * I
  288. */
  289. gboolean str_isalnum (const char *ch);
  290. /* if first characters in ch is digit
  291. * I
  292. */
  293. gboolean str_isdigit (const char *ch);
  294. /* if first characters in ch is printable
  295. * I
  296. */
  297. gboolean str_isprint (const char *ch);
  298. /* if first characters in ch is a combining mark (only in utf-8)
  299. * combining makrs are assumed to be zero width
  300. * I
  301. */
  302. gboolean str_iscombiningmark (const char *ch);
  303. /* write lower from of fisrt characters in ch into out
  304. * decrase remain by size of returned characters
  305. * if out is not big enough, do nothing
  306. */
  307. gboolean str_toupper (const char *ch, char **out, size_t * remain);
  308. /* write upper from of fisrt characters in ch into out
  309. * decrase remain by size of returned characters
  310. * if out is not big enough, do nothing
  311. */
  312. gboolean str_tolower (const char *ch, char **out, size_t * remain);
  313. /* return length of text in characters
  314. * I
  315. */
  316. int str_length (const char *text);
  317. /* return length of text in characters, limit to size
  318. * I
  319. */
  320. int str_length2 (const char *text, int size);
  321. /* return length of one char
  322. * I
  323. */
  324. int str_length_char (const char *);
  325. /* return length of text in characters, count only noncombining characters
  326. * I
  327. */
  328. int str_length_noncomb (const char *text);
  329. /* replace all invalid characters in text with questionmark
  330. * after return, text is valid string in terminal encoding
  331. * I
  332. */
  333. void str_fix_string (char *text);
  334. /* replace all invalid characters in text with questionmark
  335. * replace all unprintable characters with '.'
  336. * return static allocated string, "text" is not changed
  337. * returned string do not need to be freed
  338. * I
  339. */
  340. const char *str_term_form (const char *text);
  341. /* like str_term_form, but text can be alignment to width
  342. * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
  343. * result is completed with spaces to width
  344. * I
  345. */
  346. const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode);
  347. /* like str_term_form, but when text is wider than width, three dots are
  348. * inserted at begin and result is completed with suffix of text
  349. * no additional spaces are inserted
  350. * I
  351. */
  352. const char *str_term_trim (const char *text, int width);
  353. /* like str_term_form, but return only specified substring
  354. * start - column (position) on terminal, where substring begin
  355. * result is completed with spaces to width
  356. * I
  357. */
  358. const char *str_term_substring (const char *text, int start, int width);
  359. /* return width, that will be text occupied on terminal
  360. * I
  361. */
  362. int str_term_width1 (const char *text);
  363. /* return width, that will be text occupied on terminal
  364. * text is limited by length in characters
  365. * I
  366. */
  367. int str_term_width2 (const char *text, size_t length);
  368. /* return width, that will be character occupied on terminal
  369. * combining characters are always zero width
  370. * I
  371. */
  372. int str_term_char_width (const char *text);
  373. /* convert position in characters to position in bytes
  374. * I
  375. */
  376. int str_offset_to_pos (const char *text, size_t length);
  377. /* convert position on terminal to position in characters
  378. * I
  379. */
  380. int str_column_to_pos (const char *text, size_t pos);
  381. /* like str_fit_to_term width just_mode = J_LEFT_FIT,
  382. * but do not insert additional spaces
  383. * I
  384. */
  385. const char *str_trunc (const char *text, int width);
  386. /* create needle, that will be searched in str_search_fist/last,
  387. * so needle can be reused
  388. * in UTF-8 return normalized form of needle
  389. */
  390. char *str_create_search_needle (const char *needle, gboolean case_sen);
  391. /* free needle returned by str_create_search_needle
  392. */
  393. void str_release_search_needle (char *needle, gboolean case_sen);
  394. /* search for first occurrence of search in text
  395. */
  396. const char *str_search_first (const char *text, const char *needle, gboolean case_sen);
  397. /* search for last occurrence of search in text
  398. */
  399. const char *str_search_last (const char *text, const char *needle, gboolean case_sen);
  400. /* case sensitive compare two strings
  401. * I
  402. */
  403. int str_compare (const char *t1, const char *t2);
  404. /* case sensitive compare two strings
  405. * if one string is prefix of the other string, return 0
  406. * I
  407. */
  408. int str_ncompare (const char *t1, const char *t2);
  409. /* case insensitive compare two strings
  410. * I
  411. */
  412. int str_casecmp (const char *t1, const char *t2);
  413. /* case insensitive compare two strings
  414. * if one string is prefix of the other string, return 0
  415. * I
  416. */
  417. int str_ncasecmp (const char *t1, const char *t2);
  418. /* return, how many bytes are are same from start in text and prefix
  419. * both strings are decomposed before comparing and return value is counted
  420. * in decomposed form, too. calling with prefix, prefix, you get size in bytes
  421. * of prefix in decomposed form,
  422. * I
  423. */
  424. int str_prefix (const char *text, const char *prefix);
  425. /* case insensitive version of str_prefix
  426. * I
  427. */
  428. int str_caseprefix (const char *text, const char *prefix);
  429. /* create a key that is used by str_key_collate
  430. * I
  431. */
  432. char *str_create_key (const char *text, gboolean case_sen);
  433. /* create a key that is used by str_key_collate
  434. * should aware dot '.' in text
  435. * I
  436. */
  437. char *str_create_key_for_filename (const char *text, gboolean case_sen);
  438. /* compare two string using LC_COLLATE, if is possible
  439. * if case_sen is set, comparing is case sensitive,
  440. * case_sen must be same for str_create_key, str_key_collate and str_release_key
  441. * I
  442. */
  443. int str_key_collate (const char *t1, const char *t2, gboolean case_sen);
  444. /* release_key created by str_create_key, only rigth way to release key
  445. * I
  446. */
  447. void str_release_key (char *key, gboolean case_sen);
  448. /* return TRUE if codeset_name is utf8 or utf-8
  449. * I
  450. */
  451. gboolean str_isutf8 (const char *codeset_name);
  452. const char *str_detect_termencoding (void);
  453. int str_verscmp (const char *s1, const char *s2);
  454. /* return how many lines and columns will text occupy on terminal
  455. */
  456. void str_msg_term_size (const char *text, int *lines, int *columns);
  457. /**
  458. * skip first needle's in haystack
  459. *
  460. * @param haystack pointer to string
  461. * @param needle pointer to string
  462. * @param skip_count skip first bytes
  463. *
  464. * @return pointer to skip_count+1 needle (or NULL if not found).
  465. */
  466. char *strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count);
  467. char *str_replace_all (const char *haystack, const char *needle, const char *replacement);
  468. strtol_error_t xstrtoumax (const char *s, char **ptr, int base, uintmax_t * val,
  469. const char *valid_suffixes);
  470. uintmax_t parse_integer (const char *str, gboolean * invalid);
  471. /* --------------------------------------------------------------------------------------------- */
  472. /*** inline functions ****************************************************************************/
  473. /* --------------------------------------------------------------------------------------------- */
  474. static inline void
  475. str_replace (char *s, char from, char to)
  476. {
  477. for (; *s != '\0'; s++)
  478. {
  479. if (*s == from)
  480. *s = to;
  481. }
  482. }
  483. /* --------------------------------------------------------------------------------------------- */
  484. /*
  485. * strcpy is unsafe on overlapping memory areas, so define memmove-alike
  486. * string function.
  487. * Have sense only when:
  488. * * dest <= src
  489. * AND
  490. * * dest and str are pointers to one object (as Roland Illig pointed).
  491. *
  492. * We can't use str*cpy funs here:
  493. * http://kerneltrap.org/mailarchive/openbsd-misc/2008/5/27/1951294
  494. *
  495. * @param dest pointer to string
  496. * @param src pointer to string
  497. *
  498. * @return newly allocated string
  499. *
  500. */
  501. static inline char *
  502. str_move (char *dest, const char *src)
  503. {
  504. size_t n;
  505. g_assert (dest <= src);
  506. n = strlen (src) + 1; /* + '\0' */
  507. return (char *) memmove (dest, src, n);
  508. }
  509. /* --------------------------------------------------------------------------------------------- */
  510. #endif /* MC_STRUTIL_H */