search.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. /*
  2. Search text engine.
  3. Interface functions
  4. Copyright (C) 2009-2018
  5. Free Software Foundation, Inc.
  6. Written by:
  7. Slava Zanko <slavazanko@gmail.com>, 2009
  8. Andrew Borodin <aborodin@vmail.ru>, 2013
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdarg.h>
  23. #include <stdlib.h>
  24. #include <sys/types.h>
  25. #include "lib/global.h"
  26. #include "lib/strutil.h"
  27. #include "lib/search.h"
  28. #include "lib/util.h"
  29. #ifdef HAVE_CHARSET
  30. #include "lib/charsets.h"
  31. #endif
  32. #include "internal.h"
  33. /*** global variables ****************************************************************************/
  34. /*** file scope macro definitions ****************************************************************/
  35. /*** file scope type declarations ****************************************************************/
  36. /*** file scope variables ************************************************************************/
  37. static const mc_search_type_str_t mc_search__list_types[] = {
  38. {N_("No&rmal"), MC_SEARCH_T_NORMAL},
  39. {N_("Re&gular expression"), MC_SEARCH_T_REGEX},
  40. {N_("He&xadecimal"), MC_SEARCH_T_HEX},
  41. {N_("Wil&dcard search"), MC_SEARCH_T_GLOB},
  42. {NULL, MC_SEARCH_T_INVALID}
  43. };
  44. /*** file scope functions ************************************************************************/
  45. static mc_search_cond_t *
  46. mc_search__cond_struct_new (mc_search_t * lc_mc_search, const char *str,
  47. gsize str_len, const char *charset)
  48. {
  49. mc_search_cond_t *mc_search_cond;
  50. mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
  51. mc_search_cond->str = g_string_new_len (str, str_len);
  52. mc_search_cond->charset = g_strdup (charset);
  53. switch (lc_mc_search->search_type)
  54. {
  55. case MC_SEARCH_T_GLOB:
  56. mc_search__cond_struct_new_init_glob (charset, lc_mc_search, mc_search_cond);
  57. break;
  58. case MC_SEARCH_T_NORMAL:
  59. mc_search__cond_struct_new_init_normal (charset, lc_mc_search, mc_search_cond);
  60. break;
  61. case MC_SEARCH_T_REGEX:
  62. mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
  63. break;
  64. case MC_SEARCH_T_HEX:
  65. mc_search__cond_struct_new_init_hex (charset, lc_mc_search, mc_search_cond);
  66. break;
  67. default:
  68. break;
  69. }
  70. return mc_search_cond;
  71. }
  72. /* --------------------------------------------------------------------------------------------- */
  73. static void
  74. mc_search__cond_struct_free (mc_search_cond_t * mc_search_cond)
  75. {
  76. if (mc_search_cond->upper)
  77. g_string_free (mc_search_cond->upper, TRUE);
  78. if (mc_search_cond->lower)
  79. g_string_free (mc_search_cond->lower, TRUE);
  80. g_string_free (mc_search_cond->str, TRUE);
  81. g_free (mc_search_cond->charset);
  82. #ifdef SEARCH_TYPE_GLIB
  83. if (mc_search_cond->regex_handle)
  84. g_regex_unref (mc_search_cond->regex_handle);
  85. #else /* SEARCH_TYPE_GLIB */
  86. g_free (mc_search_cond->regex_handle);
  87. #endif /* SEARCH_TYPE_GLIB */
  88. g_free (mc_search_cond);
  89. }
  90. /* --------------------------------------------------------------------------------------------- */
  91. static void
  92. mc_search__conditions_free (GPtrArray * array)
  93. {
  94. g_ptr_array_foreach (array, (GFunc) mc_search__cond_struct_free, NULL);
  95. g_ptr_array_free (array, TRUE);
  96. }
  97. /* --------------------------------------------------------------------------------------------- */
  98. /*** public functions ****************************************************************************/
  99. /* --------------------------------------------------------------------------------------------- */
  100. /* Init search descriptor.
  101. *
  102. * @param original pattern to search
  103. * @param original_charset charset of #original. If NULL then cp_display will be used
  104. *
  105. * @return new mc_search_t object. Use #mc_search_free() to free it.
  106. */
  107. mc_search_t *
  108. mc_search_new (const gchar * original, const gchar * original_charset)
  109. {
  110. if (original == NULL)
  111. return NULL;
  112. return mc_search_new_len (original, strlen (original), original_charset);
  113. }
  114. /* --------------------------------------------------------------------------------------------- */
  115. /* Init search descriptor.
  116. *
  117. * @param original pattern to search
  118. * @param original_len length of #original or -1 if #original is NULL-terminated
  119. * @param original_charset charset of #original. If NULL then cp_display will be used
  120. *
  121. * @return new mc_search_t object. Use #mc_search_free() to free it.
  122. */
  123. mc_search_t *
  124. mc_search_new_len (const gchar * original, gsize original_len, const gchar * original_charset)
  125. {
  126. mc_search_t *lc_mc_search;
  127. if (original == NULL || original_len == 0)
  128. return NULL;
  129. lc_mc_search = g_new0 (mc_search_t, 1);
  130. lc_mc_search->original = g_strndup (original, original_len);
  131. lc_mc_search->original_len = original_len;
  132. #ifdef HAVE_CHARSET
  133. lc_mc_search->original_charset =
  134. g_strdup (original_charset != NULL
  135. && *original_charset != '\0' ? original_charset : cp_display);
  136. #else
  137. (void) original_charset;
  138. #endif
  139. return lc_mc_search;
  140. }
  141. /* --------------------------------------------------------------------------------------------- */
  142. void
  143. mc_search_free (mc_search_t * lc_mc_search)
  144. {
  145. if (lc_mc_search == NULL)
  146. return;
  147. g_free (lc_mc_search->original);
  148. #ifdef HAVE_CHARSET
  149. g_free (lc_mc_search->original_charset);
  150. #endif
  151. g_free (lc_mc_search->error_str);
  152. if (lc_mc_search->conditions != NULL)
  153. mc_search__conditions_free (lc_mc_search->conditions);
  154. #ifdef SEARCH_TYPE_GLIB
  155. if (lc_mc_search->regex_match_info != NULL)
  156. g_match_info_free (lc_mc_search->regex_match_info);
  157. #else /* SEARCH_TYPE_GLIB */
  158. g_free (lc_mc_search->regex_match_info);
  159. #endif /* SEARCH_TYPE_GLIB */
  160. if (lc_mc_search->regex_buffer != NULL)
  161. g_string_free (lc_mc_search->regex_buffer, TRUE);
  162. g_free (lc_mc_search);
  163. }
  164. /* --------------------------------------------------------------------------------------------- */
  165. gboolean
  166. mc_search_prepare (mc_search_t * lc_mc_search)
  167. {
  168. GPtrArray *ret;
  169. ret = g_ptr_array_new ();
  170. #ifdef HAVE_CHARSET
  171. if (lc_mc_search->is_all_charsets)
  172. {
  173. gsize loop1;
  174. for (loop1 = 0; loop1 < codepages->len; loop1++)
  175. {
  176. const char *id;
  177. gsize recoded_str_len;
  178. gchar *buffer;
  179. id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
  180. if (g_ascii_strcasecmp (id, lc_mc_search->original_charset) == 0)
  181. {
  182. g_ptr_array_add (ret,
  183. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
  184. lc_mc_search->original_len,
  185. lc_mc_search->original_charset));
  186. continue;
  187. }
  188. buffer =
  189. mc_search__recode_str (lc_mc_search->original, lc_mc_search->original_len,
  190. lc_mc_search->original_charset, id, &recoded_str_len);
  191. g_ptr_array_add (ret,
  192. mc_search__cond_struct_new (lc_mc_search, buffer,
  193. recoded_str_len, id));
  194. g_free (buffer);
  195. }
  196. }
  197. else
  198. {
  199. g_ptr_array_add (ret,
  200. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
  201. lc_mc_search->original_len,
  202. lc_mc_search->original_charset));
  203. }
  204. #else
  205. g_ptr_array_add (ret,
  206. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
  207. lc_mc_search->original_len,
  208. str_detect_termencoding ()));
  209. #endif
  210. lc_mc_search->conditions = ret;
  211. return (lc_mc_search->error == MC_SEARCH_E_OK);
  212. }
  213. /* --------------------------------------------------------------------------------------------- */
  214. /**
  215. * Carries out the search.
  216. *
  217. * Returns TRUE if found.
  218. *
  219. * Returns FALSE if not found. In this case, lc_mc_search->error reveals
  220. * the reason:
  221. *
  222. * - MC_SEARCH_E_NOTFOUND: the pattern isn't in the subject string.
  223. * - MC_SEARCH_E_ABORT: the user aborted the search.
  224. * - For any other reason (but not for the above two!): the description
  225. * is in lc_mc_search->error_str.
  226. */
  227. gboolean
  228. mc_search_run (mc_search_t * lc_mc_search, const void *user_data,
  229. gsize start_search, gsize end_search, gsize * found_len)
  230. {
  231. gboolean ret = FALSE;
  232. if (lc_mc_search == NULL || user_data == NULL)
  233. return FALSE;
  234. if (!mc_search_is_type_avail (lc_mc_search->search_type))
  235. {
  236. mc_search_set_error (lc_mc_search, MC_SEARCH_E_INPUT, "%s", _(STR_E_UNKNOWN_TYPE));
  237. return FALSE;
  238. }
  239. #ifdef SEARCH_TYPE_GLIB
  240. if (lc_mc_search->regex_match_info != NULL)
  241. {
  242. g_match_info_free (lc_mc_search->regex_match_info);
  243. lc_mc_search->regex_match_info = NULL;
  244. }
  245. #endif /* SEARCH_TYPE_GLIB */
  246. mc_search_set_error (lc_mc_search, MC_SEARCH_E_OK, NULL);
  247. if ((lc_mc_search->conditions == NULL) && !mc_search_prepare (lc_mc_search))
  248. return FALSE;
  249. switch (lc_mc_search->search_type)
  250. {
  251. case MC_SEARCH_T_NORMAL:
  252. ret = mc_search__run_normal (lc_mc_search, user_data, start_search, end_search, found_len);
  253. break;
  254. case MC_SEARCH_T_REGEX:
  255. ret = mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
  256. break;
  257. case MC_SEARCH_T_GLOB:
  258. ret = mc_search__run_glob (lc_mc_search, user_data, start_search, end_search, found_len);
  259. break;
  260. case MC_SEARCH_T_HEX:
  261. ret = mc_search__run_hex (lc_mc_search, user_data, start_search, end_search, found_len);
  262. break;
  263. default:
  264. break;
  265. }
  266. return ret;
  267. }
  268. /* --------------------------------------------------------------------------------------------- */
  269. gboolean
  270. mc_search_is_type_avail (mc_search_type_t search_type)
  271. {
  272. switch (search_type)
  273. {
  274. case MC_SEARCH_T_GLOB:
  275. case MC_SEARCH_T_NORMAL:
  276. case MC_SEARCH_T_REGEX:
  277. case MC_SEARCH_T_HEX:
  278. return TRUE;
  279. default:
  280. break;
  281. }
  282. return FALSE;
  283. }
  284. /* --------------------------------------------------------------------------------------------- */
  285. const mc_search_type_str_t *
  286. mc_search_types_list_get (size_t * num)
  287. {
  288. /* don't count last NULL item */
  289. if (num != NULL)
  290. *num = G_N_ELEMENTS (mc_search__list_types) - 1;
  291. return mc_search__list_types;
  292. }
  293. /* --------------------------------------------------------------------------------------------- */
  294. GString *
  295. mc_search_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
  296. {
  297. GString *ret;
  298. if (replace_str == NULL || replace_str->len == 0)
  299. return g_string_new ("");
  300. if (lc_mc_search == NULL)
  301. return g_string_new_len (replace_str->str, replace_str->len);
  302. switch (lc_mc_search->search_type)
  303. {
  304. case MC_SEARCH_T_REGEX:
  305. ret = mc_search_regex_prepare_replace_str (lc_mc_search, replace_str);
  306. break;
  307. case MC_SEARCH_T_GLOB:
  308. ret = mc_search_glob_prepare_replace_str (lc_mc_search, replace_str);
  309. break;
  310. case MC_SEARCH_T_NORMAL:
  311. ret = mc_search_normal_prepare_replace_str (lc_mc_search, replace_str);
  312. break;
  313. case MC_SEARCH_T_HEX:
  314. ret = mc_search_hex_prepare_replace_str (lc_mc_search, replace_str);
  315. break;
  316. default:
  317. ret = g_string_new_len (replace_str->str, replace_str->len);
  318. break;
  319. }
  320. return ret;
  321. }
  322. /* --------------------------------------------------------------------------------------------- */
  323. char *
  324. mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, const char *replace_str)
  325. {
  326. GString *ret;
  327. GString *replace_str2;
  328. replace_str2 = g_string_new (replace_str);
  329. ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
  330. g_string_free (replace_str2, TRUE);
  331. return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
  332. }
  333. /* --------------------------------------------------------------------------------------------- */
  334. gboolean
  335. mc_search_is_fixed_search_str (mc_search_t * lc_mc_search)
  336. {
  337. if (lc_mc_search == NULL)
  338. return FALSE;
  339. switch (lc_mc_search->search_type)
  340. {
  341. case MC_SEARCH_T_REGEX:
  342. case MC_SEARCH_T_GLOB:
  343. return FALSE;
  344. default:
  345. return TRUE;
  346. }
  347. }
  348. /* --------------------------------------------------------------------------------------------- */
  349. /* Search specified pattern in specified string.
  350. *
  351. * @param pattern string to search
  352. * @param pattern_charset charset of #pattern. If NULL then cp_display will be used
  353. * @param str string where search #pattern
  354. * @param search type (normal, regex, hex or glob)
  355. *
  356. * @return TRUE if found is successful, FALSE otherwise.
  357. */
  358. gboolean
  359. mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str,
  360. mc_search_type_t type)
  361. {
  362. gboolean ret;
  363. mc_search_t *search;
  364. if (str == NULL)
  365. return FALSE;
  366. search = mc_search_new (pattern, pattern_charset);
  367. if (search == NULL)
  368. return FALSE;
  369. search->search_type = type;
  370. search->is_case_sensitive = TRUE;
  371. if (type == MC_SEARCH_T_GLOB)
  372. search->is_entire_line = TRUE;
  373. ret = mc_search_run (search, str, 0, strlen (str), NULL);
  374. mc_search_free (search);
  375. return ret;
  376. }
  377. /* --------------------------------------------------------------------------------------------- */
  378. int
  379. mc_search_getstart_result_by_num (mc_search_t * lc_mc_search, int lc_index)
  380. {
  381. if (lc_mc_search == NULL)
  382. return 0;
  383. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  384. return 0;
  385. #ifdef SEARCH_TYPE_GLIB
  386. {
  387. gint start_pos;
  388. gint end_pos;
  389. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  390. return (int) start_pos;
  391. }
  392. #else /* SEARCH_TYPE_GLIB */
  393. return lc_mc_search->iovector[lc_index * 2];
  394. #endif /* SEARCH_TYPE_GLIB */
  395. }
  396. /* --------------------------------------------------------------------------------------------- */
  397. int
  398. mc_search_getend_result_by_num (mc_search_t * lc_mc_search, int lc_index)
  399. {
  400. if (lc_mc_search == NULL)
  401. return 0;
  402. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  403. return 0;
  404. #ifdef SEARCH_TYPE_GLIB
  405. {
  406. gint start_pos;
  407. gint end_pos;
  408. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  409. return (int) end_pos;
  410. }
  411. #else /* SEARCH_TYPE_GLIB */
  412. return lc_mc_search->iovector[lc_index * 2 + 1];
  413. #endif /* SEARCH_TYPE_GLIB */
  414. }
  415. /* --------------------------------------------------------------------------------------------- */
  416. /**
  417. * Replace an old error code and message of an mc_search_t object.
  418. *
  419. * @param mc_search mc_search_t object
  420. * @param code error code, one of mc_search_error_t values
  421. * @param format format of error message. If NULL, the old error string is free'd and become NULL
  422. */
  423. void
  424. mc_search_set_error (mc_search_t * lc_mc_search, mc_search_error_t code, const gchar * format, ...)
  425. {
  426. lc_mc_search->error = code;
  427. MC_PTR_FREE (lc_mc_search->error_str);
  428. if (format != NULL)
  429. {
  430. va_list args;
  431. va_start (args, format);
  432. lc_mc_search->error_str = g_strdup_vprintf (format, args);
  433. va_end (args);
  434. }
  435. }
  436. /* --------------------------------------------------------------------------------------------- */