search.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. /*
  2. Search text engine.
  3. Interface functions
  4. Copyright (C) 2009-2023
  5. Free Software Foundation, Inc.
  6. Written by:
  7. Slava Zanko <slavazanko@gmail.com>, 2009
  8. Andrew Borodin <aborodin@vmail.ru>, 2013
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdarg.h>
  23. #include <stdlib.h>
  24. #include <sys/types.h>
  25. #include "lib/global.h"
  26. #include "lib/strutil.h"
  27. #include "lib/search.h"
  28. #include "lib/util.h"
  29. #ifdef HAVE_CHARSET
  30. #include "lib/charsets.h"
  31. #endif
  32. #include "internal.h"
  33. /*** global variables ****************************************************************************/
  34. /*** file scope macro definitions ****************************************************************/
  35. /*** file scope type declarations ****************************************************************/
  36. /*** file scope variables ************************************************************************/
  37. static const mc_search_type_str_t mc_search__list_types[] = {
  38. {N_("No&rmal"), MC_SEARCH_T_NORMAL},
  39. {N_("Re&gular expression"), MC_SEARCH_T_REGEX},
  40. {N_("He&xadecimal"), MC_SEARCH_T_HEX},
  41. {N_("Wil&dcard search"), MC_SEARCH_T_GLOB},
  42. {NULL, MC_SEARCH_T_INVALID}
  43. };
  44. /*** file scope functions ************************************************************************/
  45. static mc_search_cond_t *
  46. mc_search__cond_struct_new (mc_search_t * lc_mc_search, const GString * str, const char *charset)
  47. {
  48. mc_search_cond_t *mc_search_cond;
  49. mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
  50. mc_search_cond->str = mc_g_string_dup (str);
  51. mc_search_cond->charset = g_strdup (charset);
  52. switch (lc_mc_search->search_type)
  53. {
  54. case MC_SEARCH_T_GLOB:
  55. mc_search__cond_struct_new_init_glob (charset, lc_mc_search, mc_search_cond);
  56. break;
  57. case MC_SEARCH_T_NORMAL:
  58. mc_search__cond_struct_new_init_normal (charset, lc_mc_search, mc_search_cond);
  59. break;
  60. case MC_SEARCH_T_REGEX:
  61. mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
  62. break;
  63. case MC_SEARCH_T_HEX:
  64. mc_search__cond_struct_new_init_hex (charset, lc_mc_search, mc_search_cond);
  65. break;
  66. default:
  67. break;
  68. }
  69. return mc_search_cond;
  70. }
  71. /* --------------------------------------------------------------------------------------------- */
  72. static void
  73. mc_search__cond_struct_free (mc_search_cond_t * mc_search_cond)
  74. {
  75. if (mc_search_cond->upper != NULL)
  76. g_string_free (mc_search_cond->upper, TRUE);
  77. if (mc_search_cond->lower != NULL)
  78. g_string_free (mc_search_cond->lower, TRUE);
  79. g_string_free (mc_search_cond->str, TRUE);
  80. g_free (mc_search_cond->charset);
  81. #ifdef SEARCH_TYPE_GLIB
  82. if (mc_search_cond->regex_handle != NULL)
  83. g_regex_unref (mc_search_cond->regex_handle);
  84. #else /* SEARCH_TYPE_GLIB */
  85. g_free (mc_search_cond->regex_handle);
  86. #endif /* SEARCH_TYPE_GLIB */
  87. g_free (mc_search_cond);
  88. }
  89. /* --------------------------------------------------------------------------------------------- */
  90. static void
  91. mc_search__conditions_free (GPtrArray * array)
  92. {
  93. g_ptr_array_foreach (array, (GFunc) mc_search__cond_struct_free, NULL);
  94. g_ptr_array_free (array, TRUE);
  95. }
  96. /* --------------------------------------------------------------------------------------------- */
  97. /*** public functions ****************************************************************************/
  98. /* --------------------------------------------------------------------------------------------- */
  99. /* Init search descriptor.
  100. *
  101. * @param original pattern to search
  102. * @param original_charset charset of #original. If NULL then cp_display will be used
  103. *
  104. * @return new mc_search_t object. Use #mc_search_free() to free it.
  105. */
  106. mc_search_t *
  107. mc_search_new (const gchar * original, const gchar * original_charset)
  108. {
  109. if (original == NULL)
  110. return NULL;
  111. return mc_search_new_len (original, strlen (original), original_charset);
  112. }
  113. /* --------------------------------------------------------------------------------------------- */
  114. /* Init search descriptor.
  115. *
  116. * @param original pattern to search
  117. * @param original_len length of #original or -1 if #original is NULL-terminated
  118. * @param original_charset charset of #original. If NULL then cp_display will be used
  119. *
  120. * @return new mc_search_t object. Use #mc_search_free() to free it.
  121. */
  122. mc_search_t *
  123. mc_search_new_len (const gchar * original, gsize original_len, const gchar * original_charset)
  124. {
  125. mc_search_t *lc_mc_search;
  126. if (original == NULL || original_len == 0)
  127. return NULL;
  128. lc_mc_search = g_new0 (mc_search_t, 1);
  129. lc_mc_search->original.str = g_string_new_len (original, original_len);
  130. #ifdef HAVE_CHARSET
  131. lc_mc_search->original.charset =
  132. g_strdup (original_charset != NULL
  133. && *original_charset != '\0' ? original_charset : cp_display);
  134. #else
  135. (void) original_charset;
  136. #endif
  137. return lc_mc_search;
  138. }
  139. /* --------------------------------------------------------------------------------------------- */
  140. void
  141. mc_search_free (mc_search_t * lc_mc_search)
  142. {
  143. if (lc_mc_search == NULL)
  144. return;
  145. g_string_free (lc_mc_search->original.str, TRUE);
  146. #ifdef HAVE_CHARSET
  147. g_free (lc_mc_search->original.charset);
  148. #endif
  149. g_free (lc_mc_search->error_str);
  150. if (lc_mc_search->prepared.conditions != NULL)
  151. mc_search__conditions_free (lc_mc_search->prepared.conditions);
  152. #ifdef SEARCH_TYPE_GLIB
  153. if (lc_mc_search->regex_match_info != NULL)
  154. g_match_info_free (lc_mc_search->regex_match_info);
  155. #else /* SEARCH_TYPE_GLIB */
  156. g_free (lc_mc_search->regex_match_info);
  157. #endif /* SEARCH_TYPE_GLIB */
  158. if (lc_mc_search->regex_buffer != NULL)
  159. g_string_free (lc_mc_search->regex_buffer, TRUE);
  160. g_free (lc_mc_search);
  161. }
  162. /* --------------------------------------------------------------------------------------------- */
  163. gboolean
  164. mc_search_prepare (mc_search_t * lc_mc_search)
  165. {
  166. GPtrArray *ret;
  167. if (lc_mc_search->prepared.conditions != NULL)
  168. return lc_mc_search->prepared.result;
  169. ret = g_ptr_array_new ();
  170. #ifdef HAVE_CHARSET
  171. if (!lc_mc_search->is_all_charsets)
  172. g_ptr_array_add (ret,
  173. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original.str,
  174. lc_mc_search->original.charset));
  175. else
  176. {
  177. gsize loop1;
  178. for (loop1 = 0; loop1 < codepages->len; loop1++)
  179. {
  180. const char *id;
  181. id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
  182. if (g_ascii_strcasecmp (id, lc_mc_search->original.charset) == 0)
  183. g_ptr_array_add (ret,
  184. mc_search__cond_struct_new (lc_mc_search,
  185. lc_mc_search->original.str,
  186. lc_mc_search->original.charset));
  187. else
  188. {
  189. GString *buffer;
  190. buffer =
  191. mc_search__recode_str (lc_mc_search->original.str->str,
  192. lc_mc_search->original.str->len,
  193. lc_mc_search->original.charset, id);
  194. g_ptr_array_add (ret, mc_search__cond_struct_new (lc_mc_search, buffer, id));
  195. g_string_free (buffer, TRUE);
  196. }
  197. }
  198. }
  199. #else
  200. g_ptr_array_add (ret,
  201. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original.str,
  202. str_detect_termencoding ()));
  203. #endif
  204. lc_mc_search->prepared.conditions = ret;
  205. lc_mc_search->prepared.result = (lc_mc_search->error == MC_SEARCH_E_OK);
  206. return lc_mc_search->prepared.result;
  207. }
  208. /* --------------------------------------------------------------------------------------------- */
  209. /**
  210. * Carries out the search.
  211. *
  212. * Returns TRUE if found.
  213. *
  214. * Returns FALSE if not found. In this case, lc_mc_search->error reveals
  215. * the reason:
  216. *
  217. * - MC_SEARCH_E_NOTFOUND: the pattern isn't in the subject string.
  218. * - MC_SEARCH_E_ABORT: the user aborted the search.
  219. * - For any other reason (but not for the above two!): the description
  220. * is in lc_mc_search->error_str.
  221. */
  222. gboolean
  223. mc_search_run (mc_search_t * lc_mc_search, const void *user_data,
  224. gsize start_search, gsize end_search, gsize * found_len)
  225. {
  226. gboolean ret = FALSE;
  227. if (lc_mc_search == NULL || user_data == NULL)
  228. return FALSE;
  229. if (!mc_search_is_type_avail (lc_mc_search->search_type))
  230. {
  231. mc_search_set_error (lc_mc_search, MC_SEARCH_E_INPUT, "%s", _(STR_E_UNKNOWN_TYPE));
  232. return FALSE;
  233. }
  234. #ifdef SEARCH_TYPE_GLIB
  235. if (lc_mc_search->regex_match_info != NULL)
  236. {
  237. g_match_info_free (lc_mc_search->regex_match_info);
  238. lc_mc_search->regex_match_info = NULL;
  239. }
  240. #endif /* SEARCH_TYPE_GLIB */
  241. mc_search_set_error (lc_mc_search, MC_SEARCH_E_OK, NULL);
  242. if (!mc_search_prepare (lc_mc_search))
  243. return FALSE;
  244. switch (lc_mc_search->search_type)
  245. {
  246. case MC_SEARCH_T_NORMAL:
  247. ret = mc_search__run_normal (lc_mc_search, user_data, start_search, end_search, found_len);
  248. break;
  249. case MC_SEARCH_T_REGEX:
  250. ret = mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
  251. break;
  252. case MC_SEARCH_T_GLOB:
  253. ret = mc_search__run_glob (lc_mc_search, user_data, start_search, end_search, found_len);
  254. break;
  255. case MC_SEARCH_T_HEX:
  256. ret = mc_search__run_hex (lc_mc_search, user_data, start_search, end_search, found_len);
  257. break;
  258. default:
  259. break;
  260. }
  261. return ret;
  262. }
  263. /* --------------------------------------------------------------------------------------------- */
  264. gboolean
  265. mc_search_is_type_avail (mc_search_type_t search_type)
  266. {
  267. switch (search_type)
  268. {
  269. case MC_SEARCH_T_GLOB:
  270. case MC_SEARCH_T_NORMAL:
  271. case MC_SEARCH_T_REGEX:
  272. case MC_SEARCH_T_HEX:
  273. return TRUE;
  274. default:
  275. break;
  276. }
  277. return FALSE;
  278. }
  279. /* --------------------------------------------------------------------------------------------- */
  280. const mc_search_type_str_t *
  281. mc_search_types_list_get (size_t * num)
  282. {
  283. /* don't count last NULL item */
  284. if (num != NULL)
  285. *num = G_N_ELEMENTS (mc_search__list_types) - 1;
  286. return mc_search__list_types;
  287. }
  288. /* --------------------------------------------------------------------------------------------- */
  289. GString *
  290. mc_search_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
  291. {
  292. GString *ret;
  293. if (replace_str == NULL || replace_str->len == 0)
  294. return g_string_new ("");
  295. if (lc_mc_search == NULL)
  296. return mc_g_string_dup (replace_str);
  297. switch (lc_mc_search->search_type)
  298. {
  299. case MC_SEARCH_T_REGEX:
  300. ret = mc_search_regex_prepare_replace_str (lc_mc_search, replace_str);
  301. break;
  302. case MC_SEARCH_T_GLOB:
  303. ret = mc_search_glob_prepare_replace_str (lc_mc_search, replace_str);
  304. break;
  305. case MC_SEARCH_T_NORMAL:
  306. ret = mc_search_normal_prepare_replace_str (lc_mc_search, replace_str);
  307. break;
  308. case MC_SEARCH_T_HEX:
  309. ret = mc_search_hex_prepare_replace_str (lc_mc_search, replace_str);
  310. break;
  311. default:
  312. ret = mc_g_string_dup (replace_str);
  313. break;
  314. }
  315. return ret;
  316. }
  317. /* --------------------------------------------------------------------------------------------- */
  318. char *
  319. mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, const char *replace_str)
  320. {
  321. GString *ret;
  322. GString *replace_str2;
  323. replace_str2 = g_string_new (replace_str);
  324. ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
  325. g_string_free (replace_str2, TRUE);
  326. return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
  327. }
  328. /* --------------------------------------------------------------------------------------------- */
  329. gboolean
  330. mc_search_is_fixed_search_str (const mc_search_t * lc_mc_search)
  331. {
  332. if (lc_mc_search == NULL)
  333. return FALSE;
  334. switch (lc_mc_search->search_type)
  335. {
  336. case MC_SEARCH_T_REGEX:
  337. case MC_SEARCH_T_GLOB:
  338. return FALSE;
  339. default:
  340. return TRUE;
  341. }
  342. }
  343. /* --------------------------------------------------------------------------------------------- */
  344. /* Search specified pattern in specified string.
  345. *
  346. * @param pattern string to search
  347. * @param pattern_charset charset of #pattern. If NULL then cp_display will be used
  348. * @param str string where search #pattern
  349. * @param search type (normal, regex, hex or glob)
  350. *
  351. * @return TRUE if found is successful, FALSE otherwise.
  352. */
  353. gboolean
  354. mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str,
  355. mc_search_type_t type)
  356. {
  357. gboolean ret;
  358. mc_search_t *search;
  359. if (str == NULL)
  360. return FALSE;
  361. search = mc_search_new (pattern, pattern_charset);
  362. if (search == NULL)
  363. return FALSE;
  364. search->search_type = type;
  365. search->is_case_sensitive = TRUE;
  366. if (type == MC_SEARCH_T_GLOB)
  367. search->is_entire_line = TRUE;
  368. ret = mc_search_run (search, str, 0, strlen (str), NULL);
  369. mc_search_free (search);
  370. return ret;
  371. }
  372. /* --------------------------------------------------------------------------------------------- */
  373. int
  374. mc_search_getstart_result_by_num (mc_search_t * lc_mc_search, int lc_index)
  375. {
  376. if (lc_mc_search == NULL)
  377. return 0;
  378. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  379. return 0;
  380. #ifdef SEARCH_TYPE_GLIB
  381. {
  382. gint start_pos;
  383. gint end_pos;
  384. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  385. return (int) start_pos;
  386. }
  387. #else /* SEARCH_TYPE_GLIB */
  388. return lc_mc_search->iovector[lc_index * 2];
  389. #endif /* SEARCH_TYPE_GLIB */
  390. }
  391. /* --------------------------------------------------------------------------------------------- */
  392. int
  393. mc_search_getend_result_by_num (mc_search_t * lc_mc_search, int lc_index)
  394. {
  395. if (lc_mc_search == NULL)
  396. return 0;
  397. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  398. return 0;
  399. #ifdef SEARCH_TYPE_GLIB
  400. {
  401. gint start_pos;
  402. gint end_pos;
  403. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  404. return (int) end_pos;
  405. }
  406. #else /* SEARCH_TYPE_GLIB */
  407. return lc_mc_search->iovector[lc_index * 2 + 1];
  408. #endif /* SEARCH_TYPE_GLIB */
  409. }
  410. /* --------------------------------------------------------------------------------------------- */
  411. /**
  412. * Replace an old error code and message of an mc_search_t object.
  413. *
  414. * @param mc_search mc_search_t object
  415. * @param code error code, one of mc_search_error_t values
  416. * @param format format of error message. If NULL, the old error string is free'd and become NULL
  417. */
  418. void
  419. mc_search_set_error (mc_search_t * lc_mc_search, mc_search_error_t code, const gchar * format, ...)
  420. {
  421. lc_mc_search->error = code;
  422. MC_PTR_FREE (lc_mc_search->error_str);
  423. if (format != NULL)
  424. {
  425. va_list args;
  426. va_start (args, format);
  427. lc_mc_search->error_str = g_strdup_vprintf (format, args);
  428. va_end (args);
  429. }
  430. }
  431. /* --------------------------------------------------------------------------------------------- */