search.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. /*
  2. Search text engine.
  3. Interface functions
  4. Copyright (C) 2009-2025
  5. Free Software Foundation, Inc.
  6. Written by:
  7. Slava Zanko <slavazanko@gmail.com>, 2009
  8. Andrew Borodin <aborodin@vmail.ru>, 2013
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdarg.h>
  23. #include <stdlib.h>
  24. #include <sys/types.h>
  25. #include "lib/global.h"
  26. #include "lib/strutil.h"
  27. #include "lib/search.h"
  28. #include "lib/util.h"
  29. #ifdef HAVE_CHARSET
  30. #include "lib/charsets.h"
  31. #endif
  32. #include "internal.h"
  33. /*** global variables ****************************************************************************/
  34. /*** file scope macro definitions ****************************************************************/
  35. /*** file scope type declarations ****************************************************************/
  36. /*** forward declarations (file scope functions) *************************************************/
  37. /*** file scope variables ************************************************************************/
  38. static const mc_search_type_str_t mc_search__list_types[] = {
  39. {N_("No&rmal"), MC_SEARCH_T_NORMAL},
  40. {N_("Re&gular expression"), MC_SEARCH_T_REGEX},
  41. {N_("He&xadecimal"), MC_SEARCH_T_HEX},
  42. {N_("Wil&dcard search"), MC_SEARCH_T_GLOB},
  43. {NULL, MC_SEARCH_T_INVALID}
  44. };
  45. /* --------------------------------------------------------------------------------------------- */
  46. /*** file scope functions ************************************************************************/
  47. /* --------------------------------------------------------------------------------------------- */
  48. static mc_search_cond_t *
  49. mc_search__cond_struct_new (mc_search_t *lc_mc_search, const GString *str, const char *charset)
  50. {
  51. mc_search_cond_t *mc_search_cond;
  52. mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
  53. mc_search_cond->str = mc_g_string_dup (str);
  54. mc_search_cond->charset = g_strdup (charset);
  55. #ifdef HAVE_PCRE2
  56. lc_mc_search->regex_match_info = pcre2_match_data_create (MC_SEARCH__NUM_REPLACE_ARGS, NULL);
  57. lc_mc_search->iovector = pcre2_get_ovector_pointer (lc_mc_search->regex_match_info);
  58. #endif
  59. switch (lc_mc_search->search_type)
  60. {
  61. case MC_SEARCH_T_GLOB:
  62. mc_search__cond_struct_new_init_glob (charset, lc_mc_search, mc_search_cond);
  63. break;
  64. case MC_SEARCH_T_NORMAL:
  65. mc_search__cond_struct_new_init_normal (charset, lc_mc_search, mc_search_cond);
  66. break;
  67. case MC_SEARCH_T_REGEX:
  68. mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
  69. break;
  70. case MC_SEARCH_T_HEX:
  71. mc_search__cond_struct_new_init_hex (charset, lc_mc_search, mc_search_cond);
  72. break;
  73. default:
  74. break;
  75. }
  76. return mc_search_cond;
  77. }
  78. /* --------------------------------------------------------------------------------------------- */
  79. static void
  80. mc_search__cond_struct_free (gpointer data)
  81. {
  82. mc_search_cond_t *mc_search_cond = (mc_search_cond_t *) data;
  83. if (mc_search_cond->upper != NULL)
  84. g_string_free (mc_search_cond->upper, TRUE);
  85. if (mc_search_cond->lower != NULL)
  86. g_string_free (mc_search_cond->lower, TRUE);
  87. g_string_free (mc_search_cond->str, TRUE);
  88. g_free (mc_search_cond->charset);
  89. #ifdef SEARCH_TYPE_GLIB
  90. if (mc_search_cond->regex_handle != NULL)
  91. g_regex_unref (mc_search_cond->regex_handle);
  92. #else /* SEARCH_TYPE_GLIB */
  93. g_free (mc_search_cond->regex_handle);
  94. #endif /* SEARCH_TYPE_GLIB */
  95. g_free (mc_search_cond);
  96. }
  97. /* --------------------------------------------------------------------------------------------- */
  98. /*** public functions ****************************************************************************/
  99. /* --------------------------------------------------------------------------------------------- */
  100. /* Init search descriptor.
  101. *
  102. * @param original pattern to search
  103. * @param original_charset charset of #original. If NULL then cp_display will be used
  104. *
  105. * @return new mc_search_t object. Use #mc_search_free() to free it.
  106. */
  107. mc_search_t *
  108. mc_search_new (const gchar *original, const gchar *original_charset)
  109. {
  110. if (original == NULL)
  111. return NULL;
  112. return mc_search_new_len (original, strlen (original), original_charset);
  113. }
  114. /* --------------------------------------------------------------------------------------------- */
  115. /* Init search descriptor.
  116. *
  117. * @param original pattern to search
  118. * @param original_len length of #original or -1 if #original is NULL-terminated
  119. * @param original_charset charset of #original. If NULL then cp_display will be used
  120. *
  121. * @return new mc_search_t object. Use #mc_search_free() to free it.
  122. */
  123. mc_search_t *
  124. mc_search_new_len (const gchar *original, gsize original_len, const gchar *original_charset)
  125. {
  126. mc_search_t *lc_mc_search;
  127. if (original == NULL || original_len == 0)
  128. return NULL;
  129. lc_mc_search = g_new0 (mc_search_t, 1);
  130. lc_mc_search->original.str = g_string_new_len (original, original_len);
  131. #ifdef HAVE_CHARSET
  132. lc_mc_search->original.charset =
  133. g_strdup (original_charset != NULL
  134. && *original_charset != '\0' ? original_charset : cp_display);
  135. #else
  136. (void) original_charset;
  137. #endif
  138. return lc_mc_search;
  139. }
  140. /* --------------------------------------------------------------------------------------------- */
  141. void
  142. mc_search_free (mc_search_t *lc_mc_search)
  143. {
  144. if (lc_mc_search == NULL)
  145. return;
  146. g_string_free (lc_mc_search->original.str, TRUE);
  147. #ifdef HAVE_CHARSET
  148. g_free (lc_mc_search->original.charset);
  149. #endif
  150. g_free (lc_mc_search->error_str);
  151. if (lc_mc_search->prepared.conditions != NULL)
  152. g_ptr_array_free (lc_mc_search->prepared.conditions, TRUE);
  153. #ifdef SEARCH_TYPE_GLIB
  154. if (lc_mc_search->regex_match_info != NULL)
  155. g_match_info_free (lc_mc_search->regex_match_info);
  156. #else /* SEARCH_TYPE_GLIB */
  157. g_free (lc_mc_search->regex_match_info);
  158. #endif /* SEARCH_TYPE_GLIB */
  159. if (lc_mc_search->regex_buffer != NULL)
  160. g_string_free (lc_mc_search->regex_buffer, TRUE);
  161. g_free (lc_mc_search);
  162. }
  163. /* --------------------------------------------------------------------------------------------- */
  164. gboolean
  165. mc_search_prepare (mc_search_t *lc_mc_search)
  166. {
  167. GPtrArray *ret;
  168. if (lc_mc_search->prepared.conditions != NULL)
  169. return lc_mc_search->prepared.result;
  170. ret = g_ptr_array_new_with_free_func (mc_search__cond_struct_free);
  171. #ifdef HAVE_CHARSET
  172. if (!lc_mc_search->is_all_charsets)
  173. g_ptr_array_add (ret,
  174. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original.str,
  175. lc_mc_search->original.charset));
  176. else
  177. {
  178. gsize loop1;
  179. for (loop1 = 0; loop1 < codepages->len; loop1++)
  180. {
  181. const char *id;
  182. id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
  183. if (g_ascii_strcasecmp (id, lc_mc_search->original.charset) == 0)
  184. g_ptr_array_add (ret,
  185. mc_search__cond_struct_new (lc_mc_search,
  186. lc_mc_search->original.str,
  187. lc_mc_search->original.charset));
  188. else
  189. {
  190. GString *buffer;
  191. buffer =
  192. mc_search__recode_str (lc_mc_search->original.str->str,
  193. lc_mc_search->original.str->len,
  194. lc_mc_search->original.charset, id);
  195. g_ptr_array_add (ret, mc_search__cond_struct_new (lc_mc_search, buffer, id));
  196. g_string_free (buffer, TRUE);
  197. }
  198. }
  199. }
  200. #else
  201. g_ptr_array_add (ret,
  202. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original.str,
  203. str_detect_termencoding ()));
  204. #endif
  205. lc_mc_search->prepared.conditions = ret;
  206. lc_mc_search->prepared.result = (lc_mc_search->error == MC_SEARCH_E_OK);
  207. return lc_mc_search->prepared.result;
  208. }
  209. /* --------------------------------------------------------------------------------------------- */
  210. /**
  211. * Carries out the search.
  212. *
  213. * Returns TRUE if found.
  214. *
  215. * Returns FALSE if not found. In this case, lc_mc_search->error reveals
  216. * the reason:
  217. *
  218. * - MC_SEARCH_E_NOTFOUND: the pattern isn't in the subject string.
  219. * - MC_SEARCH_E_ABORT: the user aborted the search.
  220. * - For any other reason (but not for the above two!): the description
  221. * is in lc_mc_search->error_str.
  222. */
  223. gboolean
  224. mc_search_run (mc_search_t *lc_mc_search, const void *user_data,
  225. off_t start_search, off_t end_search, gsize *found_len)
  226. {
  227. gboolean ret = FALSE;
  228. if (lc_mc_search == NULL || user_data == NULL)
  229. return FALSE;
  230. if (!mc_search_is_type_avail (lc_mc_search->search_type))
  231. {
  232. mc_search_set_error (lc_mc_search, MC_SEARCH_E_INPUT, "%s", _(STR_E_UNKNOWN_TYPE));
  233. return FALSE;
  234. }
  235. #ifdef SEARCH_TYPE_GLIB
  236. if (lc_mc_search->regex_match_info != NULL)
  237. {
  238. g_match_info_free (lc_mc_search->regex_match_info);
  239. lc_mc_search->regex_match_info = NULL;
  240. }
  241. #endif /* SEARCH_TYPE_GLIB */
  242. mc_search_set_error (lc_mc_search, MC_SEARCH_E_OK, NULL);
  243. if (!mc_search_prepare (lc_mc_search))
  244. return FALSE;
  245. switch (lc_mc_search->search_type)
  246. {
  247. case MC_SEARCH_T_NORMAL:
  248. ret = mc_search__run_normal (lc_mc_search, user_data, start_search, end_search, found_len);
  249. break;
  250. case MC_SEARCH_T_REGEX:
  251. ret = mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
  252. break;
  253. case MC_SEARCH_T_GLOB:
  254. ret = mc_search__run_glob (lc_mc_search, user_data, start_search, end_search, found_len);
  255. break;
  256. case MC_SEARCH_T_HEX:
  257. ret = mc_search__run_hex (lc_mc_search, user_data, start_search, end_search, found_len);
  258. break;
  259. default:
  260. break;
  261. }
  262. return ret;
  263. }
  264. /* --------------------------------------------------------------------------------------------- */
  265. gboolean
  266. mc_search_is_type_avail (mc_search_type_t search_type)
  267. {
  268. switch (search_type)
  269. {
  270. case MC_SEARCH_T_GLOB:
  271. case MC_SEARCH_T_NORMAL:
  272. case MC_SEARCH_T_REGEX:
  273. case MC_SEARCH_T_HEX:
  274. return TRUE;
  275. default:
  276. break;
  277. }
  278. return FALSE;
  279. }
  280. /* --------------------------------------------------------------------------------------------- */
  281. const mc_search_type_str_t *
  282. mc_search_types_list_get (size_t *num)
  283. {
  284. /* don't count last NULL item */
  285. if (num != NULL)
  286. *num = G_N_ELEMENTS (mc_search__list_types) - 1;
  287. return mc_search__list_types;
  288. }
  289. /* --------------------------------------------------------------------------------------------- */
  290. GString *
  291. mc_search_prepare_replace_str (mc_search_t *lc_mc_search, GString *replace_str)
  292. {
  293. GString *ret;
  294. if (replace_str == NULL || replace_str->len == 0)
  295. return g_string_new ("");
  296. if (lc_mc_search == NULL)
  297. return mc_g_string_dup (replace_str);
  298. switch (lc_mc_search->search_type)
  299. {
  300. case MC_SEARCH_T_REGEX:
  301. ret = mc_search_regex_prepare_replace_str (lc_mc_search, replace_str);
  302. break;
  303. case MC_SEARCH_T_GLOB:
  304. ret = mc_search_glob_prepare_replace_str (lc_mc_search, replace_str);
  305. break;
  306. case MC_SEARCH_T_NORMAL:
  307. ret = mc_search_normal_prepare_replace_str (lc_mc_search, replace_str);
  308. break;
  309. case MC_SEARCH_T_HEX:
  310. ret = mc_search_hex_prepare_replace_str (lc_mc_search, replace_str);
  311. break;
  312. default:
  313. ret = mc_g_string_dup (replace_str);
  314. break;
  315. }
  316. return ret;
  317. }
  318. /* --------------------------------------------------------------------------------------------- */
  319. char *
  320. mc_search_prepare_replace_str2 (mc_search_t *lc_mc_search, const char *replace_str)
  321. {
  322. GString *ret;
  323. GString *replace_str2;
  324. replace_str2 = g_string_new (replace_str);
  325. ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
  326. g_string_free (replace_str2, TRUE);
  327. return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
  328. }
  329. /* --------------------------------------------------------------------------------------------- */
  330. gboolean
  331. mc_search_is_fixed_search_str (const mc_search_t *lc_mc_search)
  332. {
  333. if (lc_mc_search == NULL)
  334. return FALSE;
  335. switch (lc_mc_search->search_type)
  336. {
  337. case MC_SEARCH_T_REGEX:
  338. case MC_SEARCH_T_GLOB:
  339. return FALSE;
  340. default:
  341. return TRUE;
  342. }
  343. }
  344. /* --------------------------------------------------------------------------------------------- */
  345. /* Search specified pattern in specified string.
  346. *
  347. * @param pattern string to search
  348. * @param pattern_charset charset of #pattern. If NULL then cp_display will be used
  349. * @param str string where search #pattern
  350. * @param search type (normal, regex, hex or glob)
  351. *
  352. * @return TRUE if found is successful, FALSE otherwise.
  353. */
  354. gboolean
  355. mc_search (const gchar *pattern, const gchar *pattern_charset, const gchar *str,
  356. mc_search_type_t type)
  357. {
  358. gboolean ret;
  359. mc_search_t *search;
  360. if (str == NULL)
  361. return FALSE;
  362. search = mc_search_new (pattern, pattern_charset);
  363. if (search == NULL)
  364. return FALSE;
  365. search->search_type = type;
  366. search->is_case_sensitive = TRUE;
  367. if (type == MC_SEARCH_T_GLOB)
  368. search->is_entire_line = TRUE;
  369. ret = mc_search_run (search, str, 0, strlen (str), NULL);
  370. mc_search_free (search);
  371. return ret;
  372. }
  373. /* --------------------------------------------------------------------------------------------- */
  374. int
  375. mc_search_getstart_result_by_num (mc_search_t *lc_mc_search, int lc_index)
  376. {
  377. if (lc_mc_search == NULL)
  378. return 0;
  379. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  380. return 0;
  381. #ifdef SEARCH_TYPE_GLIB
  382. {
  383. gint start_pos;
  384. gint end_pos;
  385. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  386. return (int) start_pos;
  387. }
  388. #else /* SEARCH_TYPE_GLIB */
  389. return lc_mc_search->iovector[lc_index * 2];
  390. #endif /* SEARCH_TYPE_GLIB */
  391. }
  392. /* --------------------------------------------------------------------------------------------- */
  393. int
  394. mc_search_getend_result_by_num (mc_search_t *lc_mc_search, int lc_index)
  395. {
  396. if (lc_mc_search == NULL)
  397. return 0;
  398. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  399. return 0;
  400. #ifdef SEARCH_TYPE_GLIB
  401. {
  402. gint start_pos;
  403. gint end_pos;
  404. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  405. return (int) end_pos;
  406. }
  407. #else /* SEARCH_TYPE_GLIB */
  408. return lc_mc_search->iovector[lc_index * 2 + 1];
  409. #endif /* SEARCH_TYPE_GLIB */
  410. }
  411. /* --------------------------------------------------------------------------------------------- */
  412. /**
  413. * Replace an old error code and message of an mc_search_t object.
  414. *
  415. * @param mc_search mc_search_t object
  416. * @param code error code, one of mc_search_error_t values
  417. * @param format format of error message. If NULL, the old error string is free'd and become NULL
  418. */
  419. void
  420. mc_search_set_error (mc_search_t *lc_mc_search, mc_search_error_t code, const gchar *format, ...)
  421. {
  422. lc_mc_search->error = code;
  423. MC_PTR_FREE (lc_mc_search->error_str);
  424. if (format != NULL)
  425. {
  426. va_list args;
  427. va_start (args, format);
  428. lc_mc_search->error_str = g_strdup_vprintf (format, args);
  429. va_end (args);
  430. }
  431. }
  432. /* --------------------------------------------------------------------------------------------- */