Browse Source

Merge branch '2396_find_whole_words'

* 2396_find_whole_words:
  Some optimization of loops in translation functions.
  Create strings with small preallocated sizes.
  mc_search__cond_struct_new_regex_ci_str(): get rid of extra string duplication.
  Minor optimization of translation function arguments.
  Avoid extra-allocation of string while prepare to regexp-search.
  Removed mc_search_cond_t->len (used mc_search_cond_t->str->len instead).
  Fixed bit operations in mc_search_regex__process_append_str()
  Code cleanup for avoid compiler warnings
  Ticket #2396 (Find File "Whole words" search bug)
Slava Zanko 14 years ago
parent
commit
b60f00df0d
8 changed files with 128 additions and 120 deletions
  1. 23 37
      lib/search/glob.c
  2. 25 17
      lib/search/hex.c
  3. 0 1
      lib/search/internal.h
  4. 23 19
      lib/search/normal.c
  5. 42 29
      lib/search/regex.c
  6. 4 5
      lib/search/search.c
  7. 1 5
      src/charsets.c
  8. 10 7
      src/selcodepage.c

+ 23 - 37
lib/search/glob.c

@@ -48,56 +48,42 @@
 /*** file scope functions ************************************************************************/
 
 static GString *
-mc_search__glob_translate_to_regex (gchar * str, gsize * len)
+mc_search__glob_translate_to_regex (const GString * astr)
 {
-    GString *buff = g_string_new ("");
-    gsize orig_len = *len;
-    gsize loop = 0;
+    const char *str = astr->str;
+    GString *buff;
+    gsize loop;
     gboolean inside_group = FALSE;
-    while (loop < orig_len)
-    {
+
+    buff = g_string_sized_new (32);
+
+    for (loop = 0; loop < astr->len; loop++)
         switch (str[loop])
         {
         case '*':
             if (!strutils_is_char_escaped (str, &(str[loop])))
-            {
-                g_string_append (buff, (inside_group) ? ".*" : "(.*)");
-                loop++;
-                continue;
-            }
+                g_string_append (buff, inside_group ? ".*" : "(.*)");
             break;
         case '?':
             if (!strutils_is_char_escaped (str, &(str[loop])))
-            {
-                g_string_append (buff, (inside_group) ? "." : "(.)");
-                loop++;
-                continue;
-            }
+                g_string_append (buff, inside_group ? "." : "(.)");
             break;
         case ',':
             if (!strutils_is_char_escaped (str, &(str[loop])))
-            {
-                g_string_append (buff, "|");
-                loop++;
-                continue;
-            }
+                g_string_append_c (buff, '|');
             break;
         case '{':
             if (!strutils_is_char_escaped (str, &(str[loop])))
             {
-                g_string_append (buff, "(");
+                g_string_append_c (buff, '(');
                 inside_group = TRUE;
-                loop++;
-                continue;
             }
             break;
         case '}':
             if (!strutils_is_char_escaped (str, &(str[loop])))
             {
-                g_string_append (buff, ")");
+                g_string_append_c (buff, ')');
                 inside_group = FALSE;
-                loop++;
-                continue;
             }
             break;
         case '+':
@@ -107,14 +93,12 @@ mc_search__glob_translate_to_regex (gchar * str, gsize * len)
         case ')':
         case '^':
             g_string_append_c (buff, '\\');
+            /* fall through */
+        default:
             g_string_append_c (buff, str[loop]);
-            loop++;
-            continue;
+            break;
         }
-        g_string_append_c (buff, str[loop]);
-        loop++;
-    }
-    *len = buff->len;
+
     return buff;
 }
 
@@ -123,9 +107,12 @@ mc_search__glob_translate_to_regex (gchar * str, gsize * len)
 static GString *
 mc_search__translate_replace_glob_to_regex (gchar * str)
 {
-    GString *buff = g_string_sized_new (32);
+    GString *buff;
     int cnt = '0';
     gboolean escaped_mode = FALSE;
+
+    buff = g_string_sized_new (32);
+
     while (*str)
     {
         char c = *str++;
@@ -162,9 +149,9 @@ void
 mc_search__cond_struct_new_init_glob (const char *charset, mc_search_t * lc_mc_search,
                                       mc_search_cond_t * mc_search_cond)
 {
-    GString *tmp =
-        mc_search__glob_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len);
+    GString *tmp;
 
+    tmp = mc_search__glob_translate_to_regex (mc_search_cond->str);
     g_string_free (mc_search_cond->str, TRUE);
 
     if (lc_mc_search->is_entire_line)
@@ -175,7 +162,6 @@ mc_search__cond_struct_new_init_glob (const char *charset, mc_search_t * lc_mc_s
     mc_search_cond->str = tmp;
 
     mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
-
 }
 
 /* --------------------------------------------------------------------------------------------- */

+ 25 - 17
lib/search/hex.c

@@ -49,48 +49,57 @@
 /*** file scope functions ************************************************************************/
 
 static GString *
-mc_search__hex_translate_to_regex (gchar * str, gsize * len)
+mc_search__hex_translate_to_regex (const GString * astr)
 {
-    GString *buff = g_string_new ("");
-    gchar *tmp_str = g_strndup (str, *len);
-    gchar *tmp_str2;
+    const char *str = astr->str;
+    GString *buff;
+    gchar *tmp_str;
     gsize loop = 0;
     int val, ptr;
 
+    buff = g_string_sized_new (64);
+    tmp_str = g_strndup (str, astr->len);
     g_strchug (tmp_str);        /* trim leadind whitespaces */
 
-    while (loop < *len) {
-        if (sscanf (tmp_str + loop, "%i%n", &val, &ptr)) {
-            if (val < -128 || val > 255) {
+    while (loop < astr->len)
+    {
+        if (sscanf (tmp_str + loop, "%i%n", &val, &ptr))
+        {
+            gchar *tmp_str2;
+
+            if (val < -128 || val > 255)
+            {
                 loop++;
                 continue;
             }
+
             tmp_str2 = g_strdup_printf ("\\x%02X", (unsigned char) val);
             g_string_append (buff, tmp_str2);
             g_free (tmp_str2);
             loop += ptr;
-            continue;
         }
-
-        if (*(tmp_str + loop) == '"') {
+        else if (*(tmp_str + loop) == '"')
+        {
             gsize loop2 = 0;
+
             loop++;
-            while (loop + loop2 < *len) {
+            while (loop + loop2 < astr->len)
+            {
                 if (*(tmp_str + loop + loop2) == '"' &&
                     !strutils_is_char_escaped (tmp_str, tmp_str + loop + loop2))
                     break;
                 loop2++;
             }
+
             g_string_append_len (buff, tmp_str + loop, loop2 - 1);
             loop += loop2;
-            continue;
         }
-        loop++;
+        else
+            loop++;
     }
 
     g_free (tmp_str);
 
-    *len = buff->len;
     return buff;
 }
 
@@ -100,14 +109,13 @@ void
 mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search,
                                      mc_search_cond_t * mc_search_cond)
 {
-    GString *tmp =
-        mc_search__hex_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len);
+    GString *tmp;
 
+    tmp = mc_search__hex_translate_to_regex (mc_search_cond->str);
     g_string_free (mc_search_cond->str, TRUE);
     mc_search_cond->str = tmp;
 
     mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
-
 }
 
 /* --------------------------------------------------------------------------------------------- */

+ 0 - 1
lib/search/internal.h

@@ -27,7 +27,6 @@ typedef struct mc_search_cond_struct {
     GString *upper;
     GString *lower;
     mc_search_regex_t *regex_handle;
-    gsize len;
     gchar *charset;
 } mc_search_cond_t;
 

+ 23 - 19
lib/search/normal.c

@@ -46,14 +46,17 @@
 /*** file scope functions ************************************************************************/
 
 static GString *
-mc_search__normal_translate_to_regex (gchar * str, gsize * len)
+mc_search__normal_translate_to_regex (const GString * astr)
 {
-    GString *buff = g_string_new ("");
-    gsize orig_len = *len;
-    gsize loop = 0;
+    const char *str = astr->str;
+    GString *buff;
+    gsize loop;
 
-    while (loop < orig_len) {
-        switch (str[loop]) {
+    buff = g_string_sized_new (32);
+
+    for (loop = 0; loop < astr->len; loop++)
+        switch (str[loop])
+        {
         case '*':
         case '?':
         case ',':
@@ -71,14 +74,12 @@ mc_search__normal_translate_to_regex (gchar * str, gsize * len)
         case '-':
         case '|':
             g_string_append_c (buff, '\\');
+            /* fall through */
+        default:
             g_string_append_c (buff, str[loop]);
-            loop++;
-            continue;
+            break;
         }
-        g_string_append_c (buff, str[loop]);
-        loop++;
-    }
-    *len = buff->len;
+
     return buff;
 }
 
@@ -88,18 +89,21 @@ void
 mc_search__cond_struct_new_init_normal (const char *charset, mc_search_t * lc_mc_search,
                                         mc_search_cond_t * mc_search_cond)
 {
-    GString *tmp =
-        mc_search__normal_translate_to_regex (mc_search_cond->str->str, &mc_search_cond->len);
+    GString *tmp;
 
+    tmp = mc_search__normal_translate_to_regex (mc_search_cond->str);
     g_string_free (mc_search_cond->str, TRUE);
-    if (lc_mc_search->whole_words) {
-        g_string_prepend (tmp, "\\b");
-        g_string_append (tmp, "\\b");
+
+    if (lc_mc_search->whole_words)
+    {
+        /* NOTE: \b as word boundary doesn't allow search
+         * whole words with non-ASCII symbols */
+        g_string_prepend (tmp, "(^|[^\\p{L}\\p{N}_])(");
+        g_string_append (tmp, ")([^\\p{L}\\p{N}_]|$)");
     }
-    mc_search_cond->str = tmp;
 
+    mc_search_cond->str = tmp;
     mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
-
 }
 
 /* --------------------------------------------------------------------------------------------- */

+ 42 - 29
lib/search/regex.c

@@ -58,7 +58,7 @@ typedef enum
 /*** file scope functions ************************************************************************/
 
 static gboolean
-mc_search__regex_str_append_if_special (GString * copy_to, GString * regex_str, gsize * offset)
+mc_search__regex_str_append_if_special (GString * copy_to, const GString * regex_str, gsize * offset)
 {
     char *tmp_regex_str;
     gsize spec_chr_len;
@@ -194,22 +194,19 @@ mc_search__cond_struct_new_regex_accum_append (const char *charset, GString * st
 /* --------------------------------------------------------------------------------------------- */
 
 static GString *
-mc_search__cond_struct_new_regex_ci_str (const char *charset, const char *str, gsize str_len)
+mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString *astr)
 {
     GString *accumulator, *spec_char, *ret_str;
     gsize loop;
-    GString *tmp;
-    tmp = g_string_new_len (str, str_len);
 
-
-    ret_str = g_string_new ("");
-    accumulator = g_string_new ("");
-    spec_char = g_string_new ("");
+    ret_str = g_string_sized_new (64);
+    accumulator = g_string_sized_new (64);
+    spec_char = g_string_sized_new (64);
     loop = 0;
 
-    while (loop <= str_len)
+    while (loop <= astr->len)
     {
-        if (mc_search__regex_str_append_if_special (spec_char, tmp, &loop))
+        if (mc_search__regex_str_append_if_special (spec_char, astr, &loop))
         {
             mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
             g_string_append_len (ret_str, spec_char->str, spec_char->len);
@@ -217,32 +214,32 @@ mc_search__cond_struct_new_regex_ci_str (const char *charset, const char *str, g
             continue;
         }
 
-        if (tmp->str[loop] == '[' && !strutils_is_char_escaped (tmp->str, &(tmp->str[loop])))
+        if (astr->str[loop] == '[' && !strutils_is_char_escaped (astr->str, &(astr->str[loop])))
         {
             mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
 
-            while (loop < str_len && !(tmp->str[loop] == ']'
-                                       && !strutils_is_char_escaped (tmp->str, &(tmp->str[loop]))))
+            while (loop < astr->len && !(astr->str[loop] == ']'
+                                       && !strutils_is_char_escaped (astr->str, &(astr->str[loop]))))
             {
-                g_string_append_c (ret_str, tmp->str[loop]);
+                g_string_append_c (ret_str, astr->str[loop]);
                 loop++;
-
             }
-            g_string_append_c (ret_str, tmp->str[loop]);
+
+            g_string_append_c (ret_str, astr->str[loop]);
             loop++;
             continue;
         }
         /*
            TODO: handle [ and ]
          */
-        g_string_append_c (accumulator, tmp->str[loop]);
+        g_string_append_c (accumulator, astr->str[loop]);
         loop++;
     }
     mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator);
 
     g_string_free (accumulator, TRUE);
     g_string_free (spec_char, TRUE);
-    g_string_free (tmp, TRUE);
+
     return ret_str;
 }
 
@@ -478,7 +475,7 @@ mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize
         char_len = strlen (tmp_str);
         if (*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR)
         {
-            *replace_flags &= !REPLACE_T_UPP_TRANSFORM_CHAR;
+            *replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR;
             tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
             g_string_append (dest_str, tmp_string->str);
             g_string_free (tmp_string, TRUE);
@@ -486,7 +483,7 @@ mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize
         }
         else if (*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR)
         {
-            *replace_flags &= !REPLACE_T_LOW_TRANSFORM_CHAR;
+            *replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR;
             tmp_string = mc_search__toupper_case_str (NULL, tmp_str, char_len);
             g_string_append (dest_str, tmp_string->str);
             g_string_free (tmp_string, TRUE);
@@ -522,7 +519,6 @@ void
 mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search,
                                        mc_search_cond_t * mc_search_cond)
 {
-    GString *tmp = NULL;
 #ifdef SEARCH_TYPE_GLIB
     GError *error = NULL;
 #else /* SEARCH_TYPE_GLIB */
@@ -532,9 +528,10 @@ mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_
 
     if (!lc_mc_search->is_case_sensitive)
     {
-        tmp = g_string_new_len (mc_search_cond->str->str, mc_search_cond->str->len);
-        g_string_free (mc_search_cond->str, TRUE);
-        mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp->str, tmp->len);
+        GString *tmp;
+
+        tmp = mc_search_cond->str;
+        mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp);
         g_string_free (tmp, TRUE);
     }
 #ifdef SEARCH_TYPE_GLIB
@@ -587,7 +584,7 @@ mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data,
     if (lc_mc_search->regex_buffer != NULL)
         g_string_free (lc_mc_search->regex_buffer, TRUE);
 
-    lc_mc_search->regex_buffer = g_string_new ("");
+    lc_mc_search->regex_buffer = g_string_sized_new (64);
 
     virtual_pos = current_pos = start_search;
     while (virtual_pos <= end_search)
@@ -622,10 +619,25 @@ mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data,
         {
         case COND__FOUND_OK:
 #ifdef SEARCH_TYPE_GLIB
-            g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
+            if (lc_mc_search->whole_words)
+            {
+                g_match_info_fetch_pos (lc_mc_search->regex_match_info, 2, &start_pos, &end_pos);
+            }
+            else
+            {
+                g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos);
+            }
 #else /* SEARCH_TYPE_GLIB */
-            start_pos = lc_mc_search->iovector[0];
-            end_pos = lc_mc_search->iovector[1];
+            if (lc_mc_search->whole_words)
+            {
+                start_pos = lc_mc_search->iovector[4];
+                end_pos = lc_mc_search->iovector[5];
+            }
+            else
+            {
+                start_pos = lc_mc_search->iovector[0];
+                end_pos = lc_mc_search->iovector[1];
+            }
 #endif /* SEARCH_TYPE_GLIB */
             if (found_len)
                 *found_len = end_pos - start_pos;
@@ -684,8 +696,9 @@ mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * repla
         return NULL;
     }
 
-    ret = g_string_new ("");
+    ret = g_string_sized_new (64);
     prev_str = replace_str->str;
+
     for (loop = 0; loop < replace_str->len - 1; loop++)
     {
         lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags);

+ 4 - 5
lib/search/search.c

@@ -64,7 +64,6 @@ mc_search__cond_struct_new (mc_search_t * lc_mc_search, const char *str,
     mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
 
     mc_search_cond->str = g_string_new_len (str, str_len);
-    mc_search_cond->len = str_len;
     mc_search_cond->charset = g_strdup (charset);
 
     switch (lc_mc_search->search_type) {
@@ -340,12 +339,12 @@ char *
 mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, char *replace_str)
 {
     GString *ret;
-    GString *replace_str2 = g_string_new (replace_str);
+    GString *replace_str2;
+
+    replace_str2 = g_string_new (replace_str);
     ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
     g_string_free (replace_str2, TRUE);
-    if (ret)
-        return g_string_free (ret, FALSE);
-    return NULL;
+    return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
 }
 
 /* --------------------------------------------------------------------------------------------- */

+ 1 - 5
src/charsets.c

@@ -114,9 +114,6 @@ load_codepages_list_from_file (GPtrArray **list, const char *fname)
             }
             else
             {
-                guint i;
-                codepage_desc *desc;
-
                 /* whether id is already present in list */
                 /* if yes, overwrite description */
                 for (i = 0; i < (*list)->len; i++)
@@ -154,7 +151,6 @@ load_codepages_list_from_file (GPtrArray **list, const char *fname)
 void
 load_codepages_list (void)
 {
-    int result = -1;
     char *fname;
 
     /* 1: try load /usr/share/mc/mc.charsets */
@@ -195,7 +191,7 @@ get_codepage_id (const int n)
 int
 get_codepage_index (const char *id)
 {
-    int i;
+    size_t i;
     if (strcmp (id, OTHER_8BIT) == 0)
 	return -1;
     if (codepages == NULL)

+ 10 - 7
src/selcodepage.c

@@ -60,7 +60,8 @@ get_hotkey (int n)
 int
 select_charset (int center_y, int center_x, int current_charset, gboolean seldisplay)
 {
-    int i;
+    size_t i;
+    int listbox_result;
     char buffer[255];
 
     /* Create listbox */
@@ -88,24 +89,26 @@ select_charset (int center_y, int center_x, int current_charset, gboolean seldis
 
     /* Select the default entry */
     i = (seldisplay)
-	? ((current_charset < 0) ? codepages->len : current_charset)
-	: (current_charset + 1);
+	? ((current_charset < 0) ? codepages->len : (size_t) current_charset)
+	: ((size_t)current_charset + 1);
 
     listbox_select_entry (listbox->list, i);
 
-    i = run_listbox (listbox);
+    listbox_result = run_listbox (listbox);
 
-    if (i < 0) {
+    if (listbox_result < 0) {
 	/* Cancel dialog */
 	return SELECT_CHARSET_CANCEL;
     } else {
 	/* some charset has been selected */
 	if (seldisplay) {
 	    /* charset list is finished with "Other 8 bit" item */
-	    return ((guint) i >= codepages->len) ? SELECT_CHARSET_OTHER_8BIT : i;
+	    return (listbox_result >= (int) codepages->len)
+	        ? SELECT_CHARSET_OTHER_8BIT
+	        : listbox_result;
 	} else {
 	    /* charset list is began with "-  < No translation >" item */
-	    return (i - 1);
+	    return (listbox_result - 1);
 	}
     }
 }