123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362 |
- /* UTF-8 strings utilities
- Copyright (C) 2007 Free Software Foundation, Inc.
- Written 2007 by:
- Rostislav Benes
- The file_date routine is mostly from GNU's fileutils package,
- written by Richard Stallman and David MacKenzie.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
- #include <config.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <errno.h>
- #include <glib.h>
- #include <langinfo.h>
- #include <string.h>
- #include "lib/global.h"
- #include "lib/strutil.h"
- /* using function for utf-8 from glib */
- static const char replch[] = "\xEF\xBF\xBD";
- static int
- str_unichar_iscombiningmark (gunichar uni)
- {
- int type = g_unichar_type (uni);
- return (type == G_UNICODE_COMBINING_MARK)
- || (type == G_UNICODE_ENCLOSING_MARK)
- || (type == G_UNICODE_NON_SPACING_MARK);
- }
- static void
- str_utf8_insert_replace_char (GString * buffer)
- {
- g_string_append (buffer, replch);
- }
- static int
- str_utf8_is_valid_string (const char *text)
- {
- return g_utf8_validate (text, -1, NULL);
- }
- static int
- str_utf8_is_valid_char (const char *ch, size_t size)
- {
- switch (g_utf8_get_char_validated (ch, size))
- {
- case (gunichar) (-2):
- return -2;
- case (gunichar) (-1):
- return -1;
- default:
- return 1;
- }
- }
- static void
- str_utf8_cnext_char (const char **text)
- {
- (*text) = g_utf8_next_char (*text);
- }
- static void
- str_utf8_cprev_char (const char **text)
- {
- (*text) = g_utf8_prev_char (*text);
- }
- static void
- str_utf8_cnext_char_safe (const char **text)
- {
- if (str_utf8_is_valid_char (*text, -1) == 1)
- (*text) = g_utf8_next_char (*text);
- else
- (*text)++;
- }
- static void
- str_utf8_cprev_char_safe (const char **text)
- {
- const char *result = g_utf8_prev_char (*text);
- const char *t = result;
- str_utf8_cnext_char_safe (&t);
- if (t == *text)
- (*text) = result;
- else
- (*text)--;
- }
- static void
- str_utf8_fix_string (char *text)
- {
- gunichar uni;
- while (text[0] != '\0')
- {
- uni = g_utf8_get_char_validated (text, -1);
- if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
- {
- text = g_utf8_next_char (text);
- }
- else
- {
- text[0] = '?';
- text++;
- }
- }
- }
- static int
- str_utf8_isspace (const char *text)
- {
- gunichar uni = g_utf8_get_char_validated (text, -1);
- return g_unichar_isspace (uni);
- }
- static int
- str_utf8_ispunct (const char *text)
- {
- gunichar uni = g_utf8_get_char_validated (text, -1);
- return g_unichar_ispunct (uni);
- }
- static int
- str_utf8_isalnum (const char *text)
- {
- gunichar uni = g_utf8_get_char_validated (text, -1);
- return g_unichar_isalnum (uni);
- }
- static int
- str_utf8_isdigit (const char *text)
- {
- gunichar uni = g_utf8_get_char_validated (text, -1);
- return g_unichar_isdigit (uni);
- }
- static int
- str_utf8_isprint (const char *ch)
- {
- gunichar uni = g_utf8_get_char_validated (ch, -1);
- return g_unichar_isprint (uni);
- }
- static int
- str_utf8_iscombiningmark (const char *ch)
- {
- gunichar uni = g_utf8_get_char_validated (ch, -1);
- return str_unichar_iscombiningmark (uni);
- }
- static int
- str_utf8_cnext_noncomb_char (const char **text)
- {
- int count = 0;
- while ((*text)[0] != '\0')
- {
- str_utf8_cnext_char_safe (text);
- count++;
- if (!str_utf8_iscombiningmark (*text))
- break;
- }
- return count;
- }
- static int
- str_utf8_cprev_noncomb_char (const char **text, const char *begin)
- {
- int count = 0;
- while ((*text) != begin)
- {
- str_utf8_cprev_char_safe (text);
- count++;
- if (!str_utf8_iscombiningmark (*text))
- break;
- }
- return count;
- }
- static int
- str_utf8_toupper (const char *text, char **out, size_t * remain)
- {
- gunichar uni;
- size_t left;
- uni = g_utf8_get_char_validated (text, -1);
- if (uni == (gunichar) (-1) || uni == (gunichar) (-2))
- return 0;
- uni = g_unichar_toupper (uni);
- left = g_unichar_to_utf8 (uni, NULL);
- if (left >= *remain)
- return 0;
- left = g_unichar_to_utf8 (uni, *out);
- (*out) += left;
- (*remain) -= left;
- return 1;
- }
- static int
- str_utf8_tolower (const char *text, char **out, size_t * remain)
- {
- gunichar uni;
- size_t left;
- uni = g_utf8_get_char_validated (text, -1);
- if (uni == (gunichar) (-1) || uni == (gunichar) (-2))
- return 0;
- uni = g_unichar_tolower (uni);
- left = g_unichar_to_utf8 (uni, NULL);
- if (left >= *remain)
- return 0;
- left = g_unichar_to_utf8 (uni, *out);
- (*out) += left;
- (*remain) -= left;
- return 1;
- }
- static int
- str_utf8_length (const char *text)
- {
- int result = 0;
- const char *start;
- const char *end;
- start = text;
- while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
- {
- if (start != end)
- {
- result += g_utf8_strlen (start, end - start);
- }
- result++;
- start = end + 1;
- }
- if (start == text)
- {
- result = g_utf8_strlen (text, -1);
- }
- else
- {
- if (start[0] != '\0' && start != end)
- {
- result += g_utf8_strlen (start, end - start);
- }
- }
- return result;
- }
- static int
- str_utf8_length2 (const char *text, int size)
- {
- int result = 0;
- const char *start;
- const char *end;
- start = text;
- while (!g_utf8_validate (start, -1, &end) && start[0] != '\0' && size > 0)
- {
- if (start != end)
- {
- result += g_utf8_strlen (start, min (end - start, size));
- size -= end - start;
- }
- result += (size > 0);
- size--;
- start = end + 1;
- }
- if (start == text)
- {
- result = g_utf8_strlen (text, size);
- }
- else
- {
- if (start[0] != '\0' && start != end && size > 0)
- {
- result += g_utf8_strlen (start, min (end - start, size));
- }
- }
- return result;
- }
- static int
- str_utf8_length_noncomb (const char *text)
- {
- int result = 0;
- const char *t = text;
- while (t[0] != '\0')
- {
- str_utf8_cnext_noncomb_char (&t);
- result++;
- }
- return result;
- }
- /*
- static void
- str_utf8_questmark_sustb (char **string, size_t * left, GString * buffer)
- {
- char *next = g_utf8_next_char (*string);
- (*left) -= next - (*string);
- (*string) = next;
- g_string_append_c (buffer, '?');
- }
- */
- static gchar *
- str_utf8_conv_gerror_message (GError *error, const char *def_msg)
- {
- if ((error != NULL) && (error->message != NULL))
- return g_strdup (error->message);
- return g_strdup (def_msg != NULL ? def_msg : "");
- }
- static estr_t
- str_utf8_vfs_convert_to (GIConv coder, const char *string,
- int size, GString * buffer)
- {
- estr_t result;
- if (coder == str_cnv_not_convert)
- {
- g_string_append_len (buffer, string, size);
- result = ESTR_SUCCESS;
- }
- else
- result = str_nconvert (coder, (char *) string, size, buffer);
- return result;
- }
- struct term_form
- {
- char text[BUF_MEDIUM * 6];
- size_t width;
- int compose;
- };
- /* utiliti function, that make string valid in utf8 and all characters printable
- * return width of string too*/
- static const struct term_form *
- str_utf8_make_make_term_form (const char *text, size_t length)
- {
- static struct term_form result;
- gunichar uni;
- size_t left;
- char *actual;
- result.text[0] = '\0';
- result.width = 0;
- result.compose = 0;
- actual = result.text;
- /* check if text start with combining character,
- * add space at begin in this case */
- if (length != 0 && text[0] != '\0')
- {
- uni = g_utf8_get_char_validated (text, -1);
- if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
- {
- if (str_unichar_iscombiningmark (uni))
- {
- actual[0] = ' ';
- actual++;
- result.width++;
- result.compose = 1;
- }
- }
- }
-
- while (length != 0 && text[0] != '\0') {
- uni = g_utf8_get_char_validated (text, -1);
- if ((uni != (gunichar)(-1)) && (uni != (gunichar)(-2))) {
- if (g_unichar_isprint(uni)) {
- left = g_unichar_to_utf8 (uni, actual);
- actual+= left;
- if (!str_unichar_iscombiningmark (uni)) {
- result.width++;
- if (g_unichar_iswide(uni)) result.width++;
- } else result.compose = 1;
- } else {
- actual[0] = '.';
- actual++;
- result.width++;
- }
- text = g_utf8_next_char (text);
- } else {
- text++;
- /*actual[0] = '?';*/
- memcpy (actual, replch, strlen (replch));
- actual+= strlen (replch);
- result.width++;
- }
- if (length != (size_t) (-1)) length--; }
- actual[0] = '\0';
- return &result;
- }
- static const char *
- str_utf8_term_form (const char *text)
- {
- static char result[BUF_MEDIUM * 6];
- const struct term_form *pre_form;
- char *composed;
- pre_form = str_utf8_make_make_term_form (text, (size_t) (-1));
- if (pre_form->compose)
- {
- composed =
- g_utf8_normalize (pre_form->text, -1,
- G_NORMALIZE_DEFAULT_COMPOSE);
- g_strlcpy (result, composed, sizeof (result));
- g_free (composed);
- }
- else
- {
- g_strlcpy (result, pre_form->text, sizeof (result));
- }
- return result;
- }
- struct utf8_tool
- {
- char *actual;
- size_t remain;
- const char *cheked;
- int ident;
- int compose;
- };
- /* utiliti function, that copy all characters from cheked to actual */
- static int
- utf8_tool_copy_chars_to_end (struct utf8_tool *tool)
- {
- size_t left;
- gunichar uni;
- tool->compose = 0;
- while (tool->cheked[0] != '\0')
- {
- uni = g_utf8_get_char (tool->cheked);
- tool->compose |= str_unichar_iscombiningmark (uni);
- left = g_unichar_to_utf8 (uni, NULL);
- if (tool->remain <= left)
- return 0;
- left = g_unichar_to_utf8 (uni, tool->actual);
- tool->actual += left;
- tool->remain -= left;
- tool->cheked = g_utf8_next_char (tool->cheked);
- }
- return 1;
- }
- /* utiliti function, that copy characters from cheked to actual until ident is
- * smaller than to_ident */
- static int
- utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident)
- {
- size_t left;
- gunichar uni;
- int w;
- tool->compose = 0;
- while (tool->cheked[0] != '\0')
- {
- uni = g_utf8_get_char (tool->cheked);
- if (!str_unichar_iscombiningmark (uni))
- {
- w = 1;
- if (g_unichar_iswide (uni))
- w++;
- if (tool->ident + w > to_ident)
- return 1;
- }
- else
- {
- w = 0;
- tool->compose = 1;
- }
- left = g_unichar_to_utf8 (uni, NULL);
- if (tool->remain <= left)
- return 0;
- left = g_unichar_to_utf8 (uni, tool->actual);
- tool->actual += left;
- tool->remain -= left;
- tool->cheked = g_utf8_next_char (tool->cheked);
- tool->ident += w;
- }
- return 1;
- }
- /* utiliti function, add count spaces to actual */
- static int
- utf8_tool_insert_space (struct utf8_tool *tool, int count)
- {
- if (count <= 0)
- return 1;
- if (tool->remain <= (gsize) count)
- return 0;
- memset (tool->actual, ' ', count);
- tool->actual += count;
- tool->remain -= count;
- return 1;
- }
- /* utiliti function, add one characters to actual */
- static int
- utf8_tool_insert_char (struct utf8_tool *tool, char ch)
- {
- if (tool->remain <= 1)
- return 0;
- tool->actual[0] = ch;
- tool->actual++;
- tool->remain--;
- return 1;
- }
- /* utiliti function, thah skip characters from cheked until ident is greater or
- * equal to to_ident */
- static int
- utf8_tool_skip_chars_to (struct utf8_tool *tool, int to_ident)
- {
- gunichar uni;
- while (to_ident > tool->ident && tool->cheked[0] != '\0')
- {
- uni = g_utf8_get_char (tool->cheked);
- if (!str_unichar_iscombiningmark (uni))
- {
- tool->ident++;
- if (g_unichar_iswide (uni))
- tool->ident++;
- }
- tool->cheked = g_utf8_next_char (tool->cheked);
- }
- uni = g_utf8_get_char (tool->cheked);
- while (str_unichar_iscombiningmark (uni))
- {
- tool->cheked = g_utf8_next_char (tool->cheked);
- uni = g_utf8_get_char (tool->cheked);
- }
- return 1;
- }
- static void
- utf8_tool_compose (char *buffer, size_t size)
- {
- char *composed =
- g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE);
- g_strlcpy (buffer, composed, size);
- g_free (composed);
- }
- static const char *
- str_utf8_fit_to_term (const char *text, int width, align_crt_t just_mode)
- {
- static char result[BUF_MEDIUM * 6];
- const struct term_form *pre_form;
- struct utf8_tool tool;
- pre_form = str_utf8_make_make_term_form (text, (size_t) (-1));
- tool.cheked = pre_form->text;
- tool.actual = result;
- tool.remain = sizeof (result);
- tool.compose = 0;
- if (pre_form->width <= (gsize)width)
- {
- tool.ident = 0;
- switch (HIDE_FIT (just_mode))
- {
- case J_CENTER_LEFT:
- case J_CENTER:
- tool.ident = (width - pre_form->width) / 2;
- break;
- case J_RIGHT:
- tool.ident = width - pre_form->width;
- break;
- }
- utf8_tool_insert_space (&tool, tool.ident);
- utf8_tool_copy_chars_to_end (&tool);
- utf8_tool_insert_space (&tool, width - pre_form->width - tool.ident);
- }
- else
- {
- if (IS_FIT (just_mode))
- {
- tool.ident = 0;
- utf8_tool_copy_chars_to (&tool, width / 2);
- utf8_tool_insert_char (&tool, '~');
- tool.ident = 0;
- utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1);
- utf8_tool_copy_chars_to_end (&tool);
- utf8_tool_insert_space (&tool,
- width - (pre_form->width - tool.ident +
- 1));
- }
- else
- {
- tool.ident = 0;
- switch (HIDE_FIT (just_mode))
- {
- case J_CENTER:
- tool.ident = (width - pre_form->width) / 2;
- break;
- case J_RIGHT:
- tool.ident = width - pre_form->width;
- break;
- }
- utf8_tool_skip_chars_to (&tool, 0);
- utf8_tool_insert_space (&tool, tool.ident);
- utf8_tool_copy_chars_to (&tool, width);
- utf8_tool_insert_space (&tool, width - tool.ident);
- }
- }
- tool.actual[0] = '\0';
- if (tool.compose)
- utf8_tool_compose (result, sizeof (result));
- return result;
- }
- static const char *
- str_utf8_term_trim (const char *text, int width)
- {
- static char result[BUF_MEDIUM * 6];
- const struct term_form *pre_form;
- struct utf8_tool tool;
- pre_form = str_utf8_make_make_term_form (text, (size_t) (-1));
- tool.cheked = pre_form->text;
- tool.actual = result;
- tool.remain = sizeof (result);
- tool.compose = 0;
- if ((gsize)width < pre_form->width)
- {
- if (width <= 3)
- {
- memset (tool.actual, '.', width);
- tool.actual += width;
- tool.remain -= width;
- }
- else
- {
- memset (tool.actual, '.', 3);
- tool.actual += 3;
- tool.remain -= 3;
- tool.ident = 0;
- utf8_tool_skip_chars_to (&tool, pre_form->width - width + 3);
- utf8_tool_copy_chars_to_end (&tool);
- }
- }
- else
- {
- utf8_tool_copy_chars_to_end (&tool);
- }
- tool.actual[0] = '\0';
- if (tool.compose)
- utf8_tool_compose (result, sizeof (result));
- return result;
- }
- static int
- str_utf8_term_width2 (const char *text, size_t length)
- {
- const struct term_form *result;
- result = str_utf8_make_make_term_form (text, length);
- return result->width;
- }
- static int
- str_utf8_term_width1 (const char *text)
- {
- return str_utf8_term_width2 (text, (size_t) (-1));
- }
- static int
- str_utf8_term_char_width (const char *text)
- {
- gunichar uni = g_utf8_get_char_validated (text, -1);
- return (str_unichar_iscombiningmark (uni)) ? 0
- : ((g_unichar_iswide (uni)) ? 2 : 1);
- }
- static void
- str_utf8_msg_term_size (const char *text, int *lines, int *columns)
- {
- char *p, *tmp;
- char *q;
- char c = '\0';
- int width;
- (*lines) = 1;
- (*columns) = 0;
- tmp = g_strdup (text);
- p = tmp;
- for (;;)
- {
- q = strchr (p, '\n');
- if (q != NULL)
- {
- c = q[0];
- q[0] = '\0';
- }
- width = str_utf8_term_width1 (p);
- if (width > (*columns))
- (*columns) = width;
- if (q == NULL)
- break;
- q[0] = c;
- p = q + 1;
- (*lines)++;
- }
- g_free (tmp);
- }
- static const char *
- str_utf8_term_substring (const char *text, int start, int width)
- {
- static char result[BUF_MEDIUM * 6];
- const struct term_form *pre_form;
- struct utf8_tool tool;
- pre_form = str_utf8_make_make_term_form (text, (size_t) (-1));
- tool.cheked = pre_form->text;
- tool.actual = result;
- tool.remain = sizeof (result);
- tool.compose = 0;
- tool.ident = -start;
- utf8_tool_skip_chars_to (&tool, 0);
- if (tool.ident < 0)
- tool.ident = 0;
- utf8_tool_insert_space (&tool, tool.ident);
- utf8_tool_copy_chars_to (&tool, width);
- utf8_tool_insert_space (&tool, width - tool.ident);
- tool.actual[0] = '\0';
- if (tool.compose)
- utf8_tool_compose (result, sizeof (result));
- return result;
- }
- static const char *
- str_utf8_trunc (const char *text, int width)
- {
- static char result[MC_MAXPATHLEN * 6 * 2];
- const struct term_form *pre_form;
- struct utf8_tool tool;
- pre_form = str_utf8_make_make_term_form (text, (size_t) (-1));
- tool.cheked = pre_form->text;
- tool.actual = result;
- tool.remain = sizeof (result);
- tool.compose = 0;
- if (pre_form->width > (gsize)width)
- {
- tool.ident = 0;
- utf8_tool_copy_chars_to (&tool, width / 2);
- utf8_tool_insert_char (&tool, '~');
- tool.ident = 0;
- utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1);
- utf8_tool_copy_chars_to_end (&tool);
- }
- else
- {
- utf8_tool_copy_chars_to_end (&tool);
- }
- tool.actual[0] = '\0';
- if (tool.compose)
- utf8_tool_compose (result, sizeof (result));
- return result;
- }
- static int
- str_utf8_offset_to_pos (const char *text, size_t length)
- {
- if (str_utf8_is_valid_string (text))
- return g_utf8_offset_to_pointer (text, length) - text;
- else
- {
- int result;
- GString *buffer = g_string_new (text);
- str_utf8_fix_string (buffer->str);
- result = g_utf8_offset_to_pointer (buffer->str, length) - buffer->str;
- g_string_free (buffer, TRUE);
- return result;
- }
- }
- static int
- str_utf8_column_to_pos (const char *text, size_t pos)
- {
- static int result;
- gunichar uni;
- int width;
- width = 0;
- result = 0;
- while (text[0] != '\0')
- {
- uni = g_utf8_get_char_validated (text, 6);
- if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)))
- {
- if (g_unichar_isprint (uni))
- {
- if (!str_unichar_iscombiningmark (uni))
- {
- width++;
- if (g_unichar_iswide (uni))
- width++;
- }
- }
- else
- {
- width++;
- }
- text = g_utf8_next_char (text);
- }
- else
- {
- text++;
- width++;
- }
- if ((gsize)width > pos)
- return result;
- result++;
- }
- return result;
- }
- static char *
- str_utf8_create_search_needle (const char *needle, int case_sen)
- {
- if (needle != NULL)
- {
- if (case_sen)
- {
- return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL);
- }
- else
- {
- char *fold = g_utf8_casefold (needle, -1);
- char *result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
- g_free (fold);
- return result;
- }
- }
- else
- return NULL;
- }
- static void
- str_utf8_release_search_needle (char *needle, int case_sen)
- {
- (void) case_sen;
- if (needle != NULL)
- g_free (needle);
- }
- static const char *
- str_utf8_search_first (const char *text, const char *search, int case_sen)
- {
- char *fold_text;
- char *deco_text;
- const char *match;
- const char *result = NULL;
- const char *m;
- fold_text = (case_sen) ? (char *) text : g_utf8_casefold (text, -1);
- deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL);
- match = deco_text;
- do
- {
- match = g_strstr_len (match, -1, search);
- if (match != NULL)
- {
- if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) &&
- !str_utf8_iscombiningmark (match + strlen (search)))
- {
- result = text;
- m = deco_text;
- while (m < match)
- {
- str_utf8_cnext_noncomb_char (&m);
- str_utf8_cnext_noncomb_char (&result);
- }
- }
- else
- {
- str_utf8_cnext_char (&match);
- }
- }
- }
- while (match != NULL && result == NULL);
- g_free (deco_text);
- if (!case_sen)
- g_free (fold_text);
- return result;
- }
- static const char *
- str_utf8_search_last (const char *text, const char *search, int case_sen)
- {
- char *fold_text;
- char *deco_text;
- char *match;
- const char *result = NULL;
- const char *m;
- fold_text = (case_sen) ? (char *) text : g_utf8_casefold (text, -1);
- deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL);
- do
- {
- match = g_strrstr_len (deco_text, -1, search);
- if (match != NULL)
- {
- if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) &&
- !str_utf8_iscombiningmark (match + strlen (search)))
- {
- result = text;
- m = deco_text;
- while (m < match)
- {
- str_utf8_cnext_noncomb_char (&m);
- str_utf8_cnext_noncomb_char (&result);
- }
- }
- else
- {
- match[0] = '\0';
- }
- }
- }
- while (match != NULL && result == NULL);
- g_free (deco_text);
- if (!case_sen)
- g_free (fold_text);
- return result;
- }
- static char *
- str_utf8_normalize (const char *text)
- {
- GString *fixed = g_string_new ("");
- char *tmp;
- char *result;
- const char *start;
- const char *end;
- start = text;
- while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
- {
- if (start != end)
- {
- tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL);
- g_string_append (fixed, tmp);
- g_free (tmp);
- }
- g_string_append_c (fixed, end[0]);
- start = end + 1;
- }
- if (start == text)
- {
- result = g_utf8_normalize (text, -1, G_NORMALIZE_ALL);
- }
- else
- {
- if (start[0] != '\0' && start != end)
- {
- tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL);
- g_string_append (fixed, tmp);
- g_free (tmp);
- }
- result = g_strdup (fixed->str);
- }
- g_string_free (fixed, TRUE);
- return result;
- }
- static char *
- str_utf8_casefold_normalize (const char *text)
- {
- GString *fixed = g_string_new ("");
- char *tmp, *fold;
- char *result;
- const char *start;
- const char *end;
- start = text;
- while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
- {
- if (start != end)
- {
- fold = g_utf8_casefold (start, end - start);
- tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
- g_string_append (fixed, tmp);
- g_free (tmp);
- g_free (fold);
- }
- g_string_append_c (fixed, end[0]);
- start = end + 1;
- }
- if (start == text)
- {
- fold = g_utf8_casefold (text, -1);
- result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
- g_free (fold);
- }
- else
- {
- if (start[0] != '\0' && start != end)
- {
- fold = g_utf8_casefold (start, end - start);
- tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL);
- g_string_append (fixed, tmp);
- g_free (tmp);
- g_free (fold);
- }
- result = g_strdup (fixed->str);
- }
- g_string_free (fixed, TRUE);
- return result;
- }
- static int
- str_utf8_compare (const char *t1, const char *t2)
- {
- char *n1, *n2;
- int result;
- n1 = str_utf8_normalize (t1);
- n2 = str_utf8_normalize (t2);
- result = strcmp (n1, n2);
- g_free (n1);
- g_free (n2);
- return result;
- }
- static int
- str_utf8_ncompare (const char *t1, const char *t2)
- {
- char *n1, *n2;
- int result;
- n1 = str_utf8_normalize (t1);
- n2 = str_utf8_normalize (t2);
- result = strncmp (n1, n2, min (strlen (n1), strlen (n2)));
- g_free (n1);
- g_free (n2);
- return result;
- }
- static int
- str_utf8_casecmp (const char *t1, const char *t2)
- {
- char *n1, *n2;
- int result;
- n1 = str_utf8_casefold_normalize (t1);
- n2 = str_utf8_casefold_normalize (t2);
- result = strcmp (n1, n2);
- g_free (n1);
- g_free (n2);
- return result;
- }
- static int
- str_utf8_ncasecmp (const char *t1, const char *t2)
- {
- char *n1, *n2;
- int result;
- n1 = str_utf8_casefold_normalize (t1);
- n2 = str_utf8_casefold_normalize (t2);
- result = strncmp (n1, n2, min (strlen (n1), strlen (n2)));
- g_free (n1);
- g_free (n2);
- return result;
- }
- static int
- str_utf8_prefix (const char *text, const char *prefix)
- {
- char *t = str_utf8_normalize (text);
- char *p = str_utf8_normalize (prefix);
- const char *nt = t;
- const char *np = p;
- const char *nnt = t;
- const char *nnp = p;
- int result;
- while (nt[0] != '\0' && np[0] != '\0')
- {
- str_utf8_cnext_char_safe (&nnt);
- str_utf8_cnext_char_safe (&nnp);
- if (nnt - nt != nnp - np)
- break;
- if (strncmp (nt, np, nnt - nt) != 0)
- break;
- nt = nnt;
- np = nnp;
- }
- result = np - p;
- g_free (t);
- g_free (p);
- return result;
- }
- static int
- str_utf8_caseprefix (const char *text, const char *prefix)
- {
- char *t = str_utf8_casefold_normalize (text);
- char *p = str_utf8_casefold_normalize (prefix);
- const char *nt = t;
- const char *np = p;
- const char *nnt = t;
- const char *nnp = p;
- int result;
- while (nt[0] != '\0' && np[0] != '\0')
- {
- str_utf8_cnext_char_safe (&nnt);
- str_utf8_cnext_char_safe (&nnp);
- if (nnt - nt != nnp - np)
- break;
- if (strncmp (nt, np, nnt - nt) != 0)
- break;
- nt = nnt;
- np = nnp;
- }
- result = np - p;
- g_free (t);
- g_free (p);
- return result;
- }
- static char *
- str_utf8_create_key_gen (const char *text, int case_sen,
- gchar * (*keygen) (const gchar *, gssize size))
- {
- char *result;
-
- if (case_sen) {
- result = str_utf8_normalize (text);
- } else {
- const char *start, *end;
- char *fold, *key;
- GString *fixed = g_string_new ("");
- start = text;
- while (!g_utf8_validate (start, -1, &end) && start[0] != '\0')
- {
- if (start != end)
- {
- fold = g_utf8_casefold (start, end - start);
- key = keygen (fold, -1);
- g_string_append (fixed, key);
- g_free (key);
- g_free (fold);
- }
- g_string_append_c (fixed, end[0]);
- start = end + 1;
- }
- if (start == text)
- {
- fold = g_utf8_casefold (text, -1);
- result = keygen (fold, -1);
- g_free (fold);
- }
- else
- {
- if (start[0] != '\0' && start != end)
- {
- fold = g_utf8_casefold (start, end - start);
- key = keygen (fold, -1);
- g_string_append (fixed, key);
- g_free (key);
- g_free (fold);
- }
- result = g_strdup (fixed->str);
- }
- g_string_free (fixed, TRUE);
- }
- return result;
- }
- static char *
- str_utf8_create_key (const char *text, int case_sen)
- {
- return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key);
- }
- #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
- static char *
- str_utf8_create_key_for_filename (const char *text, int case_sen)
- {
- return str_utf8_create_key_gen (text, case_sen,
- g_utf8_collate_key_for_filename);
- }
- #endif
- static int
- str_utf8_key_collate (const char *t1, const char *t2, int case_sen)
- {
- (void) case_sen;
- return strcmp (t1, t2);
- }
- static void
- str_utf8_release_key (char *key, int case_sen)
- {
- (void) case_sen;
- g_free (key);
- }
- struct str_class
- str_utf8_init (void)
- {
- struct str_class result;
- result.conv_gerror_message = str_utf8_conv_gerror_message;
- result.vfs_convert_to = str_utf8_vfs_convert_to;
- result.insert_replace_char = str_utf8_insert_replace_char;
- result.is_valid_string = str_utf8_is_valid_string;
- result.is_valid_char = str_utf8_is_valid_char;
- result.cnext_char = str_utf8_cnext_char;
- result.cprev_char = str_utf8_cprev_char;
- result.cnext_char_safe = str_utf8_cnext_char_safe;
- result.cprev_char_safe = str_utf8_cprev_char_safe;
- result.cnext_noncomb_char = str_utf8_cnext_noncomb_char;
- result.cprev_noncomb_char = str_utf8_cprev_noncomb_char;
- result.isspace = str_utf8_isspace;
- result.ispunct = str_utf8_ispunct;
- result.isalnum = str_utf8_isalnum;
- result.isdigit = str_utf8_isdigit;
- result.isprint = str_utf8_isprint;
- result.iscombiningmark = str_utf8_iscombiningmark;
- result.toupper = str_utf8_toupper;
- result.tolower = str_utf8_tolower;
- result.length = str_utf8_length;
- result.length2 = str_utf8_length2;
- result.length_noncomb = str_utf8_length_noncomb;
- result.fix_string = str_utf8_fix_string;
- result.term_form = str_utf8_term_form;
- result.fit_to_term = str_utf8_fit_to_term;
- result.term_trim = str_utf8_term_trim;
- result.term_width2 = str_utf8_term_width2;
- result.term_width1 = str_utf8_term_width1;
- result.term_char_width = str_utf8_term_char_width;
- result.msg_term_size = str_utf8_msg_term_size;
- result.term_substring = str_utf8_term_substring;
- result.trunc = str_utf8_trunc;
- result.offset_to_pos = str_utf8_offset_to_pos;
- result.column_to_pos = str_utf8_column_to_pos;
- result.create_search_needle = str_utf8_create_search_needle;
- result.release_search_needle = str_utf8_release_search_needle;
- result.search_first = str_utf8_search_first;
- result.search_last = str_utf8_search_last;
- result.compare = str_utf8_compare;
- result.ncompare = str_utf8_ncompare;
- result.casecmp = str_utf8_casecmp;
- result.ncasecmp = str_utf8_ncasecmp;
- result.prefix = str_utf8_prefix;
- result.caseprefix = str_utf8_caseprefix;
- result.create_key = str_utf8_create_key;
- #ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME
- /* case insensitive sort files in "a1 a2 a10" order */
- result.create_key_for_filename = str_utf8_create_key_for_filename;
- #else
- /* case insensitive sort files in "a1 a10 a2" order */
- result.create_key_for_filename = str_utf8_create_key;
- #endif
- result.key_collate = str_utf8_key_collate;
- result.release_key = str_utf8_release_key;
- return result;
- }
|