strutil.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. #ifndef MC_STRUTIL_H
  2. #define MC_STRUTIL_H
  3. #include "lib/global.h" /* include glib.h */
  4. /* Header file for strutil.c, strutilascii.c, strutil8bit.c, strutilutf8.c.
  5. * There are two sort of functions:
  6. * 1. functions for working with growing strings and conversion strings between
  7. * different encodings.
  8. * (implemented directly in strutil.c)
  9. * 2. functions, that hide differences between encodings derived from ASCII.
  10. * (implemented separately in strutilascii.c, strutil8bit.c, strutilutf8.c)
  11. * documentation is made for UTF-8 version of functions.
  12. */
  13. /* invalid strings
  14. * function, that works with invalid strings are marked with "I"
  15. * in documentation
  16. * invalid bytes of string are handled as one byte characters with width 1, they
  17. * are displayed as questionmarks, I-maked comparing functions try to keep
  18. * the original value of these bytes.
  19. */
  20. /* combining characters
  21. * displaynig: all handled as zero with characters, expect combing character
  22. * at the begin of string, this character has with one (space add before),
  23. * so str_term_width is not good for computing width of singles characters
  24. * (never return zero, expect emtpy string)
  25. * for compatibility are strings composed before displaynig
  26. * comparing: comparing decompose all string before comparing, n-compare
  27. * functions do not work as is usual, because same strings do not have to be
  28. * same length in UTF-8. So they return 0 if one string is prefix of the other
  29. * one.
  30. * str_prefix is used to determine, how many characters from one string are
  31. * prefix in second string. However, str_prefix return number of characters in
  32. * decompose form. (used in do_search (screen.c))
  33. */
  34. /* results of conversion function
  35. */
  36. typedef enum {
  37. /* success means, that convertion has been finished successully
  38. */
  39. ESTR_SUCCESS = 0,
  40. /* problem means, that not every characters was successfully converted (They are
  41. * replaced with questionmark). So is impossible convert string back.
  42. */
  43. ESTR_PROBLEM = 1,
  44. /* failure means, that conversion is not possible (example: wrong encoding
  45. * of input string)
  46. */
  47. ESTR_FAILURE = 2
  48. } estr_t;
  49. /* alignment strings on terminal
  50. */
  51. typedef enum {
  52. J_LEFT = 0x01,
  53. J_RIGHT = 0x02,
  54. J_CENTER = 0x03,
  55. /* if there is enough space for string on terminal,
  56. * string is centered otherwise is aligned to left */
  57. J_CENTER_LEFT = 0x04,
  58. /* fit alignment, if string is to long, is truncated with '~' */
  59. J_LEFT_FIT = 0x11,
  60. J_RIGHT_FIT = 0x12,
  61. J_CENTER_FIT = 0x13,
  62. J_CENTER_LEFT_FIT = 0x14
  63. } align_crt_t;
  64. #define IS_FIT(x) ((x) & 0x0010)
  65. #define MAKE_FIT(x) ((x) | 0x0010)
  66. #define HIDE_FIT(x) ((x) & 0x000f)
  67. #define INVALID_CONV ((GIConv) (-1))
  68. /* standard convertors */
  69. extern GIConv str_cnv_to_term;
  70. extern GIConv str_cnv_from_term;
  71. /* from terminal encoding to terminal encoding */
  72. extern GIConv str_cnv_not_convert;
  73. /* all functions in str_class must be defined for every encoding */
  74. struct str_class {
  75. gchar *(*conv_gerror_message) (GError *error, const char *def_msg); /*I*/
  76. estr_t (*vfs_convert_to) (GIConv coder, const char *string,
  77. int size, GString *buffer); /*I*/
  78. void (*insert_replace_char) (GString *buffer);
  79. int (*is_valid_string) (const char *); /*I*/
  80. int (*is_valid_char) (const char *, size_t); /*I*/
  81. void (*cnext_char) (const char **);
  82. void (*cprev_char) (const char **);
  83. void (*cnext_char_safe) (const char **); /*I*/
  84. void (*cprev_char_safe) (const char **); /*I*/
  85. int (*cnext_noncomb_char) (const char **text); /*I*/
  86. int (*cprev_noncomb_char) (const char **text, const char *begin); /*I*/
  87. int (*isspace) (const char *); /*I*/
  88. int (*ispunct) (const char *); /*I*/
  89. int (*isalnum) (const char *); /*I*/
  90. int (*isdigit) (const char *); /*I*/
  91. int (*isprint) (const char *); /*I*/
  92. int (*iscombiningmark) (const char *); /*I*/
  93. int (*length) (const char *); /*I*/
  94. int (*length2) (const char *, int); /*I*/
  95. int (*length_noncomb) (const char *); /*I*/
  96. int (*toupper) (const char *, char **, size_t *);
  97. int (*tolower) (const char *, char **, size_t *);
  98. void (*fix_string) (char *); /*I*/
  99. const char *(*term_form) (const char *); /*I*/
  100. const char *(*fit_to_term) (const char *, int, align_crt_t); /*I*/
  101. const char *(*term_trim) (const char *text, int width); /*I*/
  102. void (*msg_term_size) (const char *, int *, int *); /*I*/
  103. const char *(*term_substring) (const char *, int, int); /*I*/
  104. int (*term_width1) (const char *); /*I*/
  105. int (*term_width2) (const char *, size_t); /*I*/
  106. int (*term_char_width) (const char *); /*I*/
  107. const char *(*trunc) (const char *, int); /*I*/
  108. int (*offset_to_pos) (const char *, size_t); /*I*/
  109. int (*column_to_pos) (const char *, size_t); /*I*/
  110. char *(*create_search_needle) (const char *, int);
  111. void (*release_search_needle) (char *, int);
  112. const char *(*search_first) (const char *, const char *, int);
  113. const char *(*search_last) (const char *, const char *, int);
  114. int (*compare) (const char *, const char *); /*I*/
  115. int (*ncompare) (const char *, const char *); /*I*/
  116. int (*casecmp) (const char *, const char *); /*I*/
  117. int (*ncasecmp) (const char *, const char *); /*I*/
  118. int (*prefix) (const char *, const char *); /*I*/
  119. int (*caseprefix) (const char *, const char *); /*I*/
  120. char *(*create_key) (const char *text, int case_sen); /*I*/
  121. char *(*create_key_for_filename) (const char *text, int case_sen); /*I*/
  122. int (*key_collate) (const char *t1, const char *t2, int case_sen); /*I*/
  123. void (*release_key) (char *key, int case_sen); /*I*/
  124. };
  125. struct str_class str_utf8_init (void);
  126. struct str_class str_8bit_init (void);
  127. struct str_class str_ascii_init (void);
  128. /* create convertor from "from_enc" to terminal encoding
  129. * if "from_enc" is not supported return INVALID_CONV
  130. */
  131. GIConv str_crt_conv_from (const char *);
  132. /* create convertor from terminal encoding to "to_enc"
  133. * if "to_enc" is not supported return INVALID_CONV
  134. */
  135. GIConv str_crt_conv_to (const char *);
  136. /* close convertor, do not close str_cnv_to_term, str_cnv_from_term,
  137. * str_cnv_not_convert
  138. */
  139. void str_close_conv (GIConv);
  140. /* return on of not used buffers (.used == 0) or create new
  141. * returned buffer has set .used to 1
  142. */
  143. /* convert string using coder, result of conversion is appended at end of buffer
  144. * return ESTR_SUCCESS if there was no problem.
  145. * otherwise return ESTR_PROBLEM or ESTR_FAILURE
  146. */
  147. estr_t str_convert (GIConv, const char *, GString *);
  148. estr_t str_nconvert (GIConv, const char *, int, GString *);
  149. /* convert GError message (which in UTF-8) to terminal charset
  150. * def_char is used if result of error->str conversion if ESTR_FAILURE
  151. * return new allocated null-terminated string, which is need to be freed
  152. * I
  153. */
  154. gchar *str_conv_gerror_message (GError *error, const char *def_msg);
  155. /* return only ESTR_SUCCESS or ESTR_FAILURE, because vfs must be able to convert
  156. * result to original string. (so no replace with questionmark)
  157. * if coder is str_cnv_from_term or str_cnv_not_convert, string is only copied,
  158. * so is possible to show file, that is not valid in terminal encoding
  159. */
  160. estr_t str_vfs_convert_from (GIConv, const char *, GString *);
  161. /* if coder is str_cnv_to_term or str_cnv_not_convert, string is only copied,
  162. * does replace with questionmark
  163. * I
  164. */
  165. estr_t str_vfs_convert_to (GIConv, const char *, int, GString *);
  166. /* printf functin for str_buffer, append result of printf at the end of buffer
  167. */
  168. void
  169. str_printf (GString *, const char *, ...);
  170. /* add standard replacement character in terminal encoding
  171. */
  172. void str_insert_replace_char (GString *);
  173. /* init strings and set terminal encoding,
  174. * if is termenc NULL, detect terminal encoding
  175. * create all str_cnv_* and set functions for terminal encoding
  176. */
  177. void str_init_strings (const char *termenc);
  178. /* free all str_buffer and all str_cnv_*
  179. */
  180. void str_uninit_strings (void);
  181. /* try convert characters in ch to output using conv
  182. * ch_size is size of ch, can by (size_t)(-1) (-1 only for ASCII
  183. * compatible encoding, for other must be set)
  184. * return ESTR_SUCCESS if conversion was successfully,
  185. * ESTR_PROBLEM if ch contains only part of characters,
  186. * ESTR_FAILURE if conversion is not possible
  187. */
  188. estr_t str_translate_char (GIConv conv, const char *ch, size_t ch_size,
  189. char *output, size_t out_size);
  190. /* test, if text is valid in terminal encoding
  191. * I
  192. */
  193. int str_is_valid_string (const char *text);
  194. /* test, if first char of ch is valid
  195. * size, how many bytes characters occupied, could be (size_t)(-1)
  196. * return 1 if it is valid, -1 if it is invalid or -2 if it is only part of
  197. * multibyte character
  198. * I
  199. */
  200. int str_is_valid_char (const char *ch, size_t size);
  201. /* return next characters after text, do not call on the end of string
  202. */
  203. char *str_get_next_char (char *text);
  204. const char *str_cget_next_char (const char *text);
  205. /* return previous characters before text, do not call on the start of strings
  206. */
  207. char *str_get_prev_char (char *text);
  208. const char *str_cget_prev_char (const char *text);
  209. /* set text to next characters, do not call on the end of string
  210. */
  211. void str_next_char (char **text);
  212. void str_cnext_char (const char **text);
  213. /* set text to previous characters, do not call on the start of strings
  214. */
  215. void str_prev_char (char **text);
  216. void str_cprev_char (const char **text);
  217. /* return next characters after text, do not call on the end of string
  218. * works with invalid string
  219. * I
  220. */
  221. char *str_get_next_char_safe (char *text);
  222. const char *str_cget_next_char_safe (const char *text);
  223. /* return previous characters before text, do not call on the start of strings
  224. * works with invalid string
  225. * I
  226. */
  227. char *str_get_prev_char_safe (char *text);
  228. const char *str_cget_prev_char_safe (const char *text);
  229. /* set text to next characters, do not call on the end of string
  230. * works with invalid string
  231. * I
  232. */
  233. void str_next_char_safe (char **text);
  234. void str_cnext_char_safe (const char **text);
  235. /* set text to previous characters, do not call on the start of strings
  236. * works with invalid string
  237. * I
  238. */
  239. void str_prev_char_safe (char **text);
  240. void str_cprev_char_safe (const char **text);
  241. /* set text to next noncombining characters, check the end of text
  242. * return how many characters was skipped
  243. * works with invalid string
  244. * I
  245. */
  246. int str_next_noncomb_char (char **text);
  247. int str_cnext_noncomb_char (const char **text);
  248. /* set text to previous noncombining characters, search stop at begin
  249. * return how many characters was skipped
  250. * works with invalid string
  251. * I
  252. */
  253. int str_prev_noncomb_char (char **text, const char *begin);
  254. int str_cprev_noncomb_char (const char **text, const char *begin);
  255. /* if first characters in ch is space, tabulator or new lines
  256. * I
  257. */
  258. int str_isspace (const char *ch);
  259. /* if first characters in ch is punctuation or symbol
  260. * I
  261. */
  262. int str_ispunct (const char *ch);
  263. /* if first characters in ch is alphanum
  264. * I
  265. */
  266. int str_isalnum (const char *ch);
  267. /* if first characters in ch is digit
  268. * I
  269. */
  270. int str_isdigit (const char *ch);
  271. /* if first characters in ch is printable
  272. * I
  273. */
  274. int str_isprint (const char *ch);
  275. /* if first characters in ch is a combining mark (only in utf-8)
  276. * combining makrs are assumed to be zero width
  277. * I
  278. */
  279. int str_iscombiningmark (const char *ch);
  280. /* write lower from of fisrt characters in ch into out
  281. * decrase remain by size of returned characters
  282. * if out is not big enough, do nothing
  283. */
  284. int str_toupper (const char *ch, char **out, size_t *remain);
  285. /* write upper from of fisrt characters in ch into out
  286. * decrase remain by size of returned characters
  287. * if out is not big enough, do nothing
  288. */
  289. int str_tolower (const char *ch, char **out, size_t *remain);
  290. /* return length of text in characters
  291. * I
  292. */
  293. int str_length (const char* text);
  294. /* return length of text in characters, limit to size
  295. * I
  296. */
  297. int str_length2 (const char* text, int size);
  298. /* return length of one char
  299. * I
  300. */
  301. int str_length_char (const char *);
  302. /* return length of text in characters, count only noncombining characters
  303. * I
  304. */
  305. int str_length_noncomb (const char* text);
  306. /* replace all invalid characters in text with questionmark
  307. * after return, text is valid string in terminal encoding
  308. * I
  309. */
  310. void str_fix_string (char* text);
  311. /* replace all invalid characters in text with questionmark
  312. * replace all unprintable characters with '.'
  313. * return static allocated string, "text" is not changed
  314. * returned string do not need to be freed
  315. * I
  316. */
  317. const char *str_term_form (const char *text);
  318. /* like str_term_form, but text can be alignment to width
  319. * alignment is specified in just_mode (J_LEFT, J_LEFT_FIT, ...)
  320. * result is completed with spaces to width
  321. * I
  322. */
  323. const char *str_fit_to_term (const char *text, int width, align_crt_t just_mode);
  324. /* like str_term_form, but when text is wider than width, three dots are
  325. * inserted at begin and result is completed with suffix of text
  326. * no additional spaces are inserted
  327. * I
  328. */
  329. const char *str_term_trim (const char *text, int width);
  330. /* return how many lines and columns will text occupy on terminal
  331. * I
  332. */
  333. void str_msg_term_size (const char *text, int *lines, int *columns);
  334. /* like str_term_form, but return only specified substring
  335. * start - column (position) on terminal, where substring begin
  336. * result is completed with spaces to width
  337. * I
  338. */
  339. const char *str_term_substring (const char *text, int start, int width);
  340. /* return width, that will be text occupied on terminal
  341. * I
  342. */
  343. int str_term_width1 (const char *text);
  344. /* return width, that will be text occupied on terminal
  345. * text is limited by length in characters
  346. * I
  347. */
  348. int str_term_width2 (const char *text, size_t length);
  349. /* return width, that will be character occupied on terminal
  350. * combining characters are always zero width
  351. * I
  352. */
  353. int str_term_char_width (const char *text);
  354. /* convert position in characters to position in bytes
  355. * I
  356. */
  357. int str_offset_to_pos (const char* text, size_t length);
  358. /* convert position on terminal to position in characters
  359. * I
  360. */
  361. int str_column_to_pos (const char *text, size_t pos);
  362. /* like str_fit_to_term width just_mode = J_LEFT_FIT,
  363. * but do not insert additional spaces
  364. * I
  365. */
  366. const char *str_trunc (const char *text, int width);
  367. /* create needle, that will be searched in str_search_fist/last,
  368. * so needle can be reused
  369. * in UTF-8 return normalized form of needle
  370. */
  371. char *str_create_search_needle (const char *needle, int case_sen);
  372. /* free needle returned by str_create_search_needle
  373. */
  374. void str_release_search_needle (char *needle, int case_sen);
  375. /* search for first occurrence of search in text
  376. */
  377. const char *str_search_first (const char *text, const char *needle, int case_sen);
  378. /* search for last occurrence of search in text
  379. */
  380. const char *str_search_last (const char *text, const char *needle, int case_sen);
  381. /* case sensitive compare two strings
  382. * I
  383. */
  384. int str_compare (const char *t1, const char *t2);
  385. /* case sensitive compare two strings
  386. * if one string is prefix of the other string, return 0
  387. * I
  388. */
  389. int str_ncompare (const char *t1, const char *t2);
  390. /* case insensitive compare two strings
  391. * I
  392. */
  393. int str_casecmp (const char *t1, const char *t2);
  394. /* case insensitive compare two strings
  395. * if one string is prefix of the other string, return 0
  396. * I
  397. */
  398. int str_ncasecmp (const char *t1, const char *t2);
  399. /* return, how many bytes are are same from start in text and prefix
  400. * both strings are decomposed befor comapring and return value is counted
  401. * in decomposed form, too. caling with prefix, prefix, you get size in bytes
  402. * of prefix in decomposed form,
  403. * I
  404. */
  405. int str_prefix (const char *text, const char *prefix);
  406. /* case insensitive version of str_prefix
  407. * I
  408. */
  409. int str_caseprefix (const char *text, const char *prefix);
  410. /* create a key that is used by str_key_collate
  411. * I
  412. */
  413. char *str_create_key (const char *text, int case_sen);
  414. /* create a key that is used by str_key_collate
  415. * should aware dot '.' in text
  416. * I
  417. */
  418. char *str_create_key_for_filename (const char *text, int case_sen);
  419. /* compare two string using LC_COLLATE, if is possible
  420. * if case_sen is set, comparing is case sensitive,
  421. * case_sen must be same for str_create_key, str_key_collate and str_release_key
  422. * I
  423. */
  424. int str_key_collate (const char *t1, const char *t2, int case_sen);
  425. /* release_key created by str_create_key, only rigth way to release key
  426. * I
  427. */
  428. void str_release_key (char *key, int case_sen);
  429. /* return 1 if codeset_name is utf8 or utf-8
  430. * I
  431. */
  432. int str_isutf8 (const char *codeset_name);
  433. const char *str_detect_termencoding (void);
  434. int str_verscmp(const char *s1, const char *s2);
  435. #endif /* MC_STRUTIL_H*/