123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847 |
- /*
- Common strings utilities
- Copyright (C) 2007-2015
- Free Software Foundation, Inc.
- Written by:
- Rostislav Benes, 2007
- This file is part of the Midnight Commander.
- The Midnight Commander is free software: you can redistribute it
- and/or modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation, either version 3 of the License,
- or (at your option) any later version.
- The Midnight Commander is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #include <config.h>
- #include <stdlib.h>
- #include <langinfo.h>
- #include <string.h>
- #include <errno.h>
- #include "lib/global.h"
- #include "lib/strutil.h"
- /*names, that are used for utf-8 */
- static const char *str_utf8_encodings[] = {
- "utf-8",
- "utf8",
- NULL
- };
- /* standard 8bit encodings, no wide or multibytes characters */
- static const char *str_8bit_encodings[] = {
- "cp-1251",
- "cp1251",
- "cp-1250",
- "cp1250",
- "cp-866",
- "cp866",
- "ibm-866",
- "ibm866",
- "cp-850",
- "cp850",
- "cp-852",
- "cp852",
- "iso-8859",
- "iso8859",
- "koi8",
- NULL
- };
- /* terminal encoding */
- static char *codeset = NULL;
- static char *term_encoding = NULL;
- /* function for encoding specific operations */
- static struct str_class used_class;
- GIConv str_cnv_to_term;
- GIConv str_cnv_from_term;
- GIConv str_cnv_not_convert = INVALID_CONV;
- /* if enc is same encoding like on terminal */
- static int
- str_test_not_convert (const char *enc)
- {
- return g_ascii_strcasecmp (enc, codeset) == 0;
- }
- GIConv
- str_crt_conv_to (const char *to_enc)
- {
- return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert;
- }
- GIConv
- str_crt_conv_from (const char *from_enc)
- {
- return (!str_test_not_convert (from_enc))
- ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert;
- }
- void
- str_close_conv (GIConv conv)
- {
- if (conv != str_cnv_not_convert)
- g_iconv_close (conv);
- }
- static estr_t
- _str_convert (GIConv coder, const char *string, int size, GString * buffer)
- {
- estr_t state = ESTR_SUCCESS;
- gssize left;
- gsize bytes_read = 0;
- gsize bytes_written = 0;
- errno = 0; /* FIXME: is it really needed? */
- if (coder == INVALID_CONV)
- return ESTR_FAILURE;
- if (string == NULL || buffer == NULL)
- return ESTR_FAILURE;
- /*
- if (! used_class.is_valid_string (string))
- {
- return ESTR_FAILURE;
- }
- */
- if (size < 0)
- size = strlen (string);
- else
- {
- left = strlen (string);
- if (left < size)
- size = left;
- }
- left = size;
- g_iconv (coder, NULL, NULL, NULL, NULL);
- while (left != 0)
- {
- gchar *tmp_buff;
- GError *mcerror = NULL;
- tmp_buff = g_convert_with_iconv ((const gchar *) string,
- left, coder, &bytes_read, &bytes_written, &mcerror);
- if (mcerror != NULL)
- {
- int code = mcerror->code;
- g_error_free (mcerror);
- mcerror = NULL;
- switch (code)
- {
- case G_CONVERT_ERROR_NO_CONVERSION:
- /* Conversion between the requested character sets is not supported. */
- tmp_buff = g_strnfill (strlen (string), '?');
- g_string_append (buffer, tmp_buff);
- g_free (tmp_buff);
- return ESTR_FAILURE;
- case G_CONVERT_ERROR_ILLEGAL_SEQUENCE:
- /* Invalid byte sequence in conversion input. */
- if ((tmp_buff == NULL) && (bytes_read != 0))
- /* recode valid byte sequence */
- tmp_buff = g_convert_with_iconv ((const gchar *) string,
- bytes_read, coder, NULL, NULL, NULL);
- if (tmp_buff != NULL)
- {
- g_string_append (buffer, tmp_buff);
- g_free (tmp_buff);
- }
- if ((int) bytes_read >= left)
- return ESTR_PROBLEM;
- string += bytes_read + 1;
- size -= (bytes_read + 1);
- left -= (bytes_read + 1);
- g_string_append_c (buffer, *(string - 1));
- state = ESTR_PROBLEM;
- break;
- case G_CONVERT_ERROR_PARTIAL_INPUT:
- /* Partial character sequence at end of input. */
- g_string_append (buffer, tmp_buff);
- g_free (tmp_buff);
- if ((int) bytes_read < left)
- {
- left = left - bytes_read;
- tmp_buff = g_strnfill (left, '?');
- g_string_append (buffer, tmp_buff);
- g_free (tmp_buff);
- }
- return ESTR_PROBLEM;
- case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */
- case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */
- case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */
- default:
- g_free (tmp_buff);
- return ESTR_FAILURE;
- }
- }
- else if (tmp_buff == NULL)
- {
- g_string_append (buffer, string);
- return ESTR_PROBLEM;
- }
- else if (*tmp_buff == '\0')
- {
- g_free (tmp_buff);
- g_string_append (buffer, string);
- return state;
- }
- else
- {
- g_string_append (buffer, tmp_buff);
- g_free (tmp_buff);
- string += bytes_read;
- left -= bytes_read;
- }
- }
- return state;
- }
- estr_t
- str_convert (GIConv coder, const char *string, GString * buffer)
- {
- return _str_convert (coder, string, -1, buffer);
- }
- estr_t
- str_nconvert (GIConv coder, const char *string, int size, GString * buffer)
- {
- return _str_convert (coder, string, size, buffer);
- }
- gchar *
- str_conv_gerror_message (GError * mcerror, const char *def_msg)
- {
- return used_class.conv_gerror_message (mcerror, def_msg);
- }
- estr_t
- str_vfs_convert_from (GIConv coder, const char *string, GString * buffer)
- {
- estr_t result = ESTR_SUCCESS;
- if (coder == str_cnv_not_convert)
- g_string_append (buffer, string != NULL ? string : "");
- else
- result = _str_convert (coder, string, -1, buffer);
- return result;
- }
- estr_t
- str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer)
- {
- return used_class.vfs_convert_to (coder, string, size, buffer);
- }
- void
- str_printf (GString * buffer, const char *format, ...)
- {
- va_list ap;
- va_start (ap, format);
- g_string_append_vprintf (buffer, format, ap);
- va_end (ap);
- }
- void
- str_insert_replace_char (GString * buffer)
- {
- used_class.insert_replace_char (buffer);
- }
- estr_t
- str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size)
- {
- size_t left;
- size_t cnv;
- g_iconv (conv, NULL, NULL, NULL, NULL);
- left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size;
- cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size);
- if (cnv == (size_t) (-1))
- return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE;
- output[0] = '\0';
- return ESTR_SUCCESS;
- }
- const char *
- str_detect_termencoding (void)
- {
- if (term_encoding == NULL)
- {
- /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set
- to utf-8 or UTF-8.
- On Mac OS X, it returns the same case as the LANG input.
- So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */
- term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1);
- }
- return term_encoding;
- }
- static int
- str_test_encoding_class (const char *encoding, const char **table)
- {
- int result = 0;
- if (encoding != NULL)
- {
- int t;
- for (t = 0; table[t] != NULL; t++)
- if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0)
- result++;
- }
- return result;
- }
- static void
- str_choose_str_functions (void)
- {
- if (str_test_encoding_class (codeset, str_utf8_encodings))
- used_class = str_utf8_init ();
- else if (str_test_encoding_class (codeset, str_8bit_encodings))
- used_class = str_8bit_init ();
- else
- used_class = str_ascii_init ();
- }
- gboolean
- str_isutf8 (const char *codeset_name)
- {
- return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0);
- }
- void
- str_init_strings (const char *termenc)
- {
- codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ());
- str_cnv_not_convert = g_iconv_open (codeset, codeset);
- if (str_cnv_not_convert == INVALID_CONV)
- {
- if (termenc != NULL)
- {
- g_free (codeset);
- codeset = g_strdup (str_detect_termencoding ());
- str_cnv_not_convert = g_iconv_open (codeset, codeset);
- }
- if (str_cnv_not_convert == INVALID_CONV)
- {
- g_free (codeset);
- codeset = g_strdup (DEFAULT_CHARSET);
- str_cnv_not_convert = g_iconv_open (codeset, codeset);
- }
- }
- str_cnv_to_term = str_cnv_not_convert;
- str_cnv_from_term = str_cnv_not_convert;
- str_choose_str_functions ();
- }
- void
- str_uninit_strings (void)
- {
- if (str_cnv_not_convert != INVALID_CONV)
- g_iconv_close (str_cnv_not_convert);
- g_free (term_encoding);
- g_free (codeset);
- }
- const char *
- str_term_form (const char *text)
- {
- return used_class.term_form (text);
- }
- const char *
- str_fit_to_term (const char *text, int width, align_crt_t just_mode)
- {
- return used_class.fit_to_term (text, width, just_mode);
- }
- const char *
- str_term_trim (const char *text, int width)
- {
- return used_class.term_trim (text, width);
- }
- const char *
- str_term_substring (const char *text, int start, int width)
- {
- return used_class.term_substring (text, start, width);
- }
- char *
- str_get_next_char (char *text)
- {
- used_class.cnext_char ((const char **) &text);
- return text;
- }
- const char *
- str_cget_next_char (const char *text)
- {
- used_class.cnext_char (&text);
- return text;
- }
- void
- str_next_char (char **text)
- {
- used_class.cnext_char ((const char **) text);
- }
- void
- str_cnext_char (const char **text)
- {
- used_class.cnext_char (text);
- }
- char *
- str_get_prev_char (char *text)
- {
- used_class.cprev_char ((const char **) &text);
- return text;
- }
- const char *
- str_cget_prev_char (const char *text)
- {
- used_class.cprev_char (&text);
- return text;
- }
- void
- str_prev_char (char **text)
- {
- used_class.cprev_char ((const char **) text);
- }
- void
- str_cprev_char (const char **text)
- {
- used_class.cprev_char (text);
- }
- char *
- str_get_next_char_safe (char *text)
- {
- used_class.cnext_char_safe ((const char **) &text);
- return text;
- }
- const char *
- str_cget_next_char_safe (const char *text)
- {
- used_class.cnext_char_safe (&text);
- return text;
- }
- void
- str_next_char_safe (char **text)
- {
- used_class.cnext_char_safe ((const char **) text);
- }
- void
- str_cnext_char_safe (const char **text)
- {
- used_class.cnext_char_safe (text);
- }
- char *
- str_get_prev_char_safe (char *text)
- {
- used_class.cprev_char_safe ((const char **) &text);
- return text;
- }
- const char *
- str_cget_prev_char_safe (const char *text)
- {
- used_class.cprev_char_safe (&text);
- return text;
- }
- void
- str_prev_char_safe (char **text)
- {
- used_class.cprev_char_safe ((const char **) text);
- }
- void
- str_cprev_char_safe (const char **text)
- {
- used_class.cprev_char_safe (text);
- }
- int
- str_next_noncomb_char (char **text)
- {
- return used_class.cnext_noncomb_char ((const char **) text);
- }
- int
- str_cnext_noncomb_char (const char **text)
- {
- return used_class.cnext_noncomb_char (text);
- }
- int
- str_prev_noncomb_char (char **text, const char *begin)
- {
- return used_class.cprev_noncomb_char ((const char **) text, begin);
- }
- int
- str_cprev_noncomb_char (const char **text, const char *begin)
- {
- return used_class.cprev_noncomb_char (text, begin);
- }
- int
- str_is_valid_char (const char *ch, size_t size)
- {
- return used_class.is_valid_char (ch, size);
- }
- int
- str_term_width1 (const char *text)
- {
- return used_class.term_width1 (text);
- }
- int
- str_term_width2 (const char *text, size_t length)
- {
- return used_class.term_width2 (text, length);
- }
- int
- str_term_char_width (const char *text)
- {
- return used_class.term_char_width (text);
- }
- int
- str_offset_to_pos (const char *text, size_t length)
- {
- return used_class.offset_to_pos (text, length);
- }
- int
- str_length (const char *text)
- {
- return used_class.length (text);
- }
- int
- str_length_char (const char *text)
- {
- return str_cget_next_char_safe (text) - text;
- }
- int
- str_length2 (const char *text, int size)
- {
- return used_class.length2 (text, size);
- }
- int
- str_length_noncomb (const char *text)
- {
- return used_class.length_noncomb (text);
- }
- int
- str_column_to_pos (const char *text, size_t pos)
- {
- return used_class.column_to_pos (text, pos);
- }
- int
- str_isspace (const char *ch)
- {
- return used_class.char_isspace (ch);
- }
- int
- str_ispunct (const char *ch)
- {
- return used_class.char_ispunct (ch);
- }
- int
- str_isalnum (const char *ch)
- {
- return used_class.char_isalnum (ch);
- }
- int
- str_isdigit (const char *ch)
- {
- return used_class.char_isdigit (ch);
- }
- int
- str_toupper (const char *ch, char **out, size_t * remain)
- {
- return used_class.char_toupper (ch, out, remain);
- }
- int
- str_tolower (const char *ch, char **out, size_t * remain)
- {
- return used_class.char_tolower (ch, out, remain);
- }
- int
- str_isprint (const char *ch)
- {
- return used_class.char_isprint (ch);
- }
- gboolean
- str_iscombiningmark (const char *ch)
- {
- return used_class.char_iscombiningmark (ch);
- }
- const char *
- str_trunc (const char *text, int width)
- {
- return used_class.trunc (text, width);
- }
- char *
- str_create_search_needle (const char *needle, int case_sen)
- {
- return used_class.create_search_needle (needle, case_sen);
- }
- void
- str_release_search_needle (char *needle, int case_sen)
- {
- used_class.release_search_needle (needle, case_sen);
- }
- const char *
- str_search_first (const char *text, const char *search, int case_sen)
- {
- return used_class.search_first (text, search, case_sen);
- }
- const char *
- str_search_last (const char *text, const char *search, int case_sen)
- {
- return used_class.search_last (text, search, case_sen);
- }
- int
- str_is_valid_string (const char *text)
- {
- return used_class.is_valid_string (text);
- }
- int
- str_compare (const char *t1, const char *t2)
- {
- return used_class.compare (t1, t2);
- }
- int
- str_ncompare (const char *t1, const char *t2)
- {
- return used_class.ncompare (t1, t2);
- }
- int
- str_casecmp (const char *t1, const char *t2)
- {
- return used_class.casecmp (t1, t2);
- }
- int
- str_ncasecmp (const char *t1, const char *t2)
- {
- return used_class.ncasecmp (t1, t2);
- }
- int
- str_prefix (const char *text, const char *prefix)
- {
- return used_class.prefix (text, prefix);
- }
- int
- str_caseprefix (const char *text, const char *prefix)
- {
- return used_class.caseprefix (text, prefix);
- }
- void
- str_fix_string (char *text)
- {
- used_class.fix_string (text);
- }
- char *
- str_create_key (const char *text, int case_sen)
- {
- return used_class.create_key (text, case_sen);
- }
- char *
- str_create_key_for_filename (const char *text, int case_sen)
- {
- return used_class.create_key_for_filename (text, case_sen);
- }
- int
- str_key_collate (const char *t1, const char *t2, int case_sen)
- {
- return used_class.key_collate (t1, t2, case_sen);
- }
- void
- str_release_key (char *key, int case_sen)
- {
- used_class.release_key (key, case_sen);
- }
- void
- str_msg_term_size (const char *text, int *lines, int *columns)
- {
- char *p, *tmp;
- char *q;
- char c = '\0';
- *lines = 1;
- *columns = 0;
- tmp = g_strdup (text);
- p = tmp;
- while (TRUE)
- {
- int width;
- q = strchr (p, '\n');
- if (q != NULL)
- {
- c = q[0];
- q[0] = '\0';
- }
- width = str_term_width1 (p);
- if (width > *columns)
- *columns = width;
- if (q == NULL)
- break;
- q[0] = c;
- p = q + 1;
- (*lines)++;
- }
- g_free (tmp);
- }
- /* --------------------------------------------------------------------------------------------- */
- char *
- strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count)
- {
- char *semi;
- ssize_t len;
- len = strlen (haystack);
- do
- {
- semi = g_strrstr_len (haystack, len, needle);
- if (semi == NULL)
- return NULL;
- len = semi - haystack - 1;
- }
- while (skip_count-- != 0);
- return semi;
- }
- /* --------------------------------------------------------------------------------------------- */
- /* Interprete string as a non-negative decimal integer, optionally multiplied by various values.
- *
- * @param str input value
- * @param invalid set to TRUE if "str" does not represent a number in this format
- *
- * @return non-integer representation of "str", 0 in case of error.
- */
- uintmax_t
- parse_integer (const char *str, gboolean * invalid)
- {
- uintmax_t n;
- char *suffix;
- strtol_error_t e;
- e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
- if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
- {
- uintmax_t multiplier;
- multiplier = parse_integer (suffix + 1, invalid);
- if (multiplier != 0 && n * multiplier / multiplier != n)
- {
- *invalid = TRUE;
- return 0;
- }
- n *= multiplier;
- }
- else if (e != LONGINT_OK)
- {
- *invalid = TRUE;
- n = 0;
- }
- return n;
- }
- /* --------------------------------------------------------------------------------------------- */
|