hex.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /*
  2. Search text engine.
  3. HEX-style pattern matching
  4. Copyright (C) 2009-2025
  5. Free Software Foundation, Inc.
  6. Written by:
  7. Slava Zanko <slavazanko@gmail.com>, 2009.
  8. This file is part of the Midnight Commander.
  9. The Midnight Commander is free software: you can redistribute it
  10. and/or modify it under the terms of the GNU General Public License as
  11. published by the Free Software Foundation, either version 3 of the License,
  12. or (at your option) any later version.
  13. The Midnight Commander is distributed in the hope that it will be useful,
  14. but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. GNU General Public License for more details.
  17. You should have received a copy of the GNU General Public License
  18. along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include <config.h>
  21. #include <stdio.h>
  22. #include "lib/global.h"
  23. #include "lib/strutil.h"
  24. #include "lib/search.h"
  25. #include "internal.h"
  26. /*** global variables ****************************************************************************/
  27. /*** file scope macro definitions ****************************************************************/
  28. typedef enum
  29. {
  30. MC_SEARCH_HEX_E_OK,
  31. MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE,
  32. MC_SEARCH_HEX_E_INVALID_CHARACTER,
  33. MC_SEARCH_HEX_E_UNMATCHED_QUOTES
  34. } mc_search_hex_parse_error_t;
  35. /*** file scope type declarations ****************************************************************/
  36. /*** forward declarations (file scope functions) *************************************************/
  37. /*** file scope variables ************************************************************************/
  38. /* --------------------------------------------------------------------------------------------- */
  39. /*** file scope functions ************************************************************************/
  40. /* --------------------------------------------------------------------------------------------- */
  41. static GString *
  42. mc_search__hex_translate_to_regex (const GString *astr, mc_search_hex_parse_error_t *error_ptr,
  43. int *error_pos_ptr)
  44. {
  45. GString *buff;
  46. const char *str;
  47. gsize str_len;
  48. gsize loop = 0;
  49. mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
  50. buff = g_string_sized_new (64);
  51. str = astr->str;
  52. str_len = astr->len;
  53. while (loop < str_len && error == MC_SEARCH_HEX_E_OK)
  54. {
  55. unsigned int val;
  56. int ptr;
  57. if (g_ascii_isspace (str[loop]))
  58. {
  59. /* Eat-up whitespace between tokens. */
  60. while (g_ascii_isspace (str[loop]))
  61. loop++;
  62. }
  63. /* cppcheck-suppress invalidscanf */
  64. else if (sscanf (str + loop, "%x%n", &val, &ptr) == 1)
  65. {
  66. if (val > 255)
  67. error = MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE;
  68. else
  69. {
  70. g_string_append_printf (buff, "\\x%02X", val);
  71. loop += ptr;
  72. }
  73. }
  74. else if (str[loop] == '"')
  75. {
  76. gsize loop2;
  77. loop2 = loop + 1;
  78. while (loop2 < str_len)
  79. {
  80. if (str[loop2] == '"')
  81. break;
  82. if (str[loop2] == '\\' && loop2 + 1 < str_len)
  83. loop2++;
  84. g_string_append_c (buff, str[loop2]);
  85. loop2++;
  86. }
  87. if (str[loop2] == '\0')
  88. error = MC_SEARCH_HEX_E_UNMATCHED_QUOTES;
  89. else
  90. loop = loop2 + 1;
  91. }
  92. else
  93. error = MC_SEARCH_HEX_E_INVALID_CHARACTER;
  94. }
  95. if (error != MC_SEARCH_HEX_E_OK)
  96. {
  97. g_string_free (buff, TRUE);
  98. if (error_ptr != NULL)
  99. *error_ptr = error;
  100. if (error_pos_ptr != NULL)
  101. *error_pos_ptr = loop;
  102. return NULL;
  103. }
  104. return buff;
  105. }
  106. /* --------------------------------------------------------------------------------------------- */
  107. /*** public functions ****************************************************************************/
  108. /* --------------------------------------------------------------------------------------------- */
  109. void
  110. mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t *lc_mc_search,
  111. mc_search_cond_t *mc_search_cond)
  112. {
  113. GString *tmp;
  114. mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK;
  115. int error_pos = 0;
  116. /*
  117. * We may be searching in binary data, which is often invalid UTF-8.
  118. *
  119. * We have to create a non UTF-8 regex (that is, G_REGEX_RAW) or else, as
  120. * the data is invalid UTF-8, both GLib's PCRE and our
  121. * mc_search__g_regex_match_full_safe() are going to fail us. The former by
  122. * not finding all bytes, the latter by overwriting the supposedly invalid
  123. * UTF-8 with NULs.
  124. *
  125. * To do this, we specify "ASCII" as the charset.
  126. *
  127. * In fact, we can specify any charset other than "UTF-8": any such charset
  128. * will trigger G_REGEX_RAW (see [1]). The output of [2] will be the same
  129. * for all charsets because it skips the \xXX symbols
  130. * mc_search__hex_translate_to_regex() outputs.
  131. *
  132. * But "ASCII" is the best choice because a hex pattern may contain a
  133. * quoted string: this way we know [2] will ignore any characters outside
  134. * ASCII letters range (these ignored chars will be copied verbatim to the
  135. * output and will match as-is; in other words, in a case-sensitive manner;
  136. * If the user is interested in case-insensitive searches of international
  137. * text, he shouldn't be using hex search in the first place.)
  138. *
  139. * Switching out of UTF-8 has another advantage:
  140. *
  141. * When doing case-insensitive searches, GLib treats \xXX symbols as normal
  142. * letters and therefore matches both "a" and "A" for the hex pattern
  143. * "0x61". When we switch out of UTF-8, we're switching to using [2], which
  144. * doesn't have this issue.
  145. *
  146. * [1] mc_search__cond_struct_new_init_regex
  147. * [2] mc_search__cond_struct_new_regex_ci_str
  148. */
  149. if (str_isutf8 (charset))
  150. charset = "ASCII";
  151. tmp = mc_search__hex_translate_to_regex (mc_search_cond->str, &error, &error_pos);
  152. if (tmp != NULL)
  153. {
  154. g_string_free (mc_search_cond->str, TRUE);
  155. mc_search_cond->str = tmp;
  156. mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
  157. }
  158. else
  159. {
  160. const char *desc;
  161. switch (error)
  162. {
  163. case MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE:
  164. desc =
  165. _
  166. ("Number out of range (should be in byte range, 0 <= n <= 0xFF, expressed in hex)");
  167. break;
  168. case MC_SEARCH_HEX_E_INVALID_CHARACTER:
  169. desc = _("Invalid character");
  170. break;
  171. case MC_SEARCH_HEX_E_UNMATCHED_QUOTES:
  172. desc = _("Unmatched quotes character");
  173. break;
  174. default:
  175. desc = "";
  176. }
  177. lc_mc_search->error = MC_SEARCH_E_INPUT;
  178. lc_mc_search->error_str =
  179. g_strdup_printf (_("Hex pattern error at position %d:\n%s."), error_pos + 1, desc);
  180. }
  181. }
  182. /* --------------------------------------------------------------------------------------------- */
  183. gboolean
  184. mc_search__run_hex (mc_search_t *lc_mc_search, const void *user_data,
  185. off_t start_search, off_t end_search, gsize *found_len)
  186. {
  187. return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
  188. }
  189. /* --------------------------------------------------------------------------------------------- */
  190. GString *
  191. mc_search_hex_prepare_replace_str (mc_search_t *lc_mc_search, GString *replace_str)
  192. {
  193. (void) lc_mc_search;
  194. return mc_g_string_dup (replace_str);
  195. }
  196. /* --------------------------------------------------------------------------------------------- */