search.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. /*
  2. Search text engine.
  3. Interface functions
  4. Copyright (C) 2009, 2011, 2013
  5. The Free Software Foundation, Inc.
  6. Written by:
  7. Slava Zanko <slavazanko@gmail.com>, 2009
  8. Andrew Borodin <aborodin@vmail.ru>, 2013
  9. This file is part of the Midnight Commander.
  10. The Midnight Commander is free software: you can redistribute it
  11. and/or modify it under the terms of the GNU General Public License as
  12. published by the Free Software Foundation, either version 3 of the License,
  13. or (at your option) any later version.
  14. The Midnight Commander is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. GNU General Public License for more details.
  18. You should have received a copy of the GNU General Public License
  19. along with this program. If not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include <config.h>
  22. #include <stdlib.h>
  23. #include <sys/types.h>
  24. #include "lib/global.h"
  25. #include "lib/strutil.h"
  26. #include "lib/search.h"
  27. #ifdef HAVE_CHARSET
  28. #include "lib/charsets.h"
  29. #endif
  30. #include "internal.h"
  31. /*** global variables ****************************************************************************/
  32. /*** file scope macro definitions ****************************************************************/
  33. /*** file scope type declarations ****************************************************************/
  34. /*** file scope variables ************************************************************************/
  35. static const mc_search_type_str_t mc_search__list_types[] = {
  36. {N_("No&rmal"), MC_SEARCH_T_NORMAL},
  37. {N_("Re&gular expression"), MC_SEARCH_T_REGEX},
  38. {N_("He&xadecimal"), MC_SEARCH_T_HEX},
  39. {N_("Wil&dcard search"), MC_SEARCH_T_GLOB},
  40. {NULL, -1}
  41. };
  42. /*** file scope functions ************************************************************************/
  43. static mc_search_cond_t *
  44. mc_search__cond_struct_new (mc_search_t * lc_mc_search, const char *str,
  45. gsize str_len, const char *charset)
  46. {
  47. mc_search_cond_t *mc_search_cond;
  48. mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t));
  49. mc_search_cond->str = g_string_new_len (str, str_len);
  50. mc_search_cond->charset = g_strdup (charset);
  51. switch (lc_mc_search->search_type)
  52. {
  53. case MC_SEARCH_T_GLOB:
  54. mc_search__cond_struct_new_init_glob (charset, lc_mc_search, mc_search_cond);
  55. break;
  56. case MC_SEARCH_T_NORMAL:
  57. mc_search__cond_struct_new_init_normal (charset, lc_mc_search, mc_search_cond);
  58. break;
  59. case MC_SEARCH_T_REGEX:
  60. mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond);
  61. break;
  62. case MC_SEARCH_T_HEX:
  63. mc_search__cond_struct_new_init_hex (charset, lc_mc_search, mc_search_cond);
  64. break;
  65. default:
  66. break;
  67. }
  68. return mc_search_cond;
  69. }
  70. /* --------------------------------------------------------------------------------------------- */
  71. static void
  72. mc_search__cond_struct_free (mc_search_cond_t * mc_search_cond)
  73. {
  74. if (mc_search_cond->upper)
  75. g_string_free (mc_search_cond->upper, TRUE);
  76. if (mc_search_cond->lower)
  77. g_string_free (mc_search_cond->lower, TRUE);
  78. g_string_free (mc_search_cond->str, TRUE);
  79. g_free (mc_search_cond->charset);
  80. #ifdef SEARCH_TYPE_GLIB
  81. if (mc_search_cond->regex_handle)
  82. g_regex_unref (mc_search_cond->regex_handle);
  83. #else /* SEARCH_TYPE_GLIB */
  84. g_free (mc_search_cond->regex_handle);
  85. #endif /* SEARCH_TYPE_GLIB */
  86. g_free (mc_search_cond);
  87. }
  88. /* --------------------------------------------------------------------------------------------- */
  89. static void
  90. mc_search__conditions_free (GPtrArray * array)
  91. {
  92. g_ptr_array_foreach (array, (GFunc) mc_search__cond_struct_free, NULL);
  93. g_ptr_array_free (array, TRUE);
  94. }
  95. /* --------------------------------------------------------------------------------------------- */
  96. /*** public functions ****************************************************************************/
  97. /* --------------------------------------------------------------------------------------------- */
  98. /* Init search descriptor.
  99. *
  100. * @param original pattern to search
  101. * @param original_len length of #original or -1 if #original is NULL-terminated
  102. * @param original_charset charset of #original. If NULL then cp_display will be used
  103. *
  104. * @return new mc_search_t object. Use #mc_search_free() to free it.
  105. */
  106. mc_search_t *
  107. mc_search_new (const gchar * original, gsize original_len, const gchar * original_charset)
  108. {
  109. mc_search_t *lc_mc_search;
  110. if (original == NULL)
  111. return NULL;
  112. if ((gssize) original_len == -1)
  113. {
  114. original_len = strlen (original);
  115. if (original_len == 0)
  116. return NULL;
  117. }
  118. lc_mc_search = g_new0 (mc_search_t, 1);
  119. lc_mc_search->original = g_strndup (original, original_len);
  120. lc_mc_search->original_len = original_len;
  121. #ifdef HAVE_CHARSET
  122. lc_mc_search->original_charset =
  123. g_strdup (original_charset != NULL
  124. && *original_charset != '\0' ? original_charset : cp_display);
  125. #else
  126. (void) original_charset;
  127. #endif
  128. return lc_mc_search;
  129. }
  130. /* --------------------------------------------------------------------------------------------- */
  131. void
  132. mc_search_free (mc_search_t * lc_mc_search)
  133. {
  134. if (lc_mc_search == NULL)
  135. return;
  136. g_free (lc_mc_search->original);
  137. #ifdef HAVE_CHARSET
  138. g_free (lc_mc_search->original_charset);
  139. #endif
  140. g_free (lc_mc_search->error_str);
  141. if (lc_mc_search->conditions != NULL)
  142. mc_search__conditions_free (lc_mc_search->conditions);
  143. #ifdef SEARCH_TYPE_GLIB
  144. if (lc_mc_search->regex_match_info != NULL)
  145. g_match_info_free (lc_mc_search->regex_match_info);
  146. #else /* SEARCH_TYPE_GLIB */
  147. g_free (lc_mc_search->regex_match_info);
  148. #endif /* SEARCH_TYPE_GLIB */
  149. if (lc_mc_search->regex_buffer != NULL)
  150. g_string_free (lc_mc_search->regex_buffer, TRUE);
  151. g_free (lc_mc_search);
  152. }
  153. /* --------------------------------------------------------------------------------------------- */
  154. gboolean
  155. mc_search_prepare (mc_search_t * lc_mc_search)
  156. {
  157. GPtrArray *ret;
  158. ret = g_ptr_array_new ();
  159. #ifdef HAVE_CHARSET
  160. if (lc_mc_search->is_all_charsets)
  161. {
  162. gsize loop1;
  163. for (loop1 = 0; loop1 < codepages->len; loop1++)
  164. {
  165. const char *id;
  166. gsize recoded_str_len;
  167. gchar *buffer;
  168. id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id;
  169. if (g_ascii_strcasecmp (id, lc_mc_search->original_charset) == 0)
  170. {
  171. g_ptr_array_add (ret,
  172. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
  173. lc_mc_search->original_len,
  174. lc_mc_search->original_charset));
  175. continue;
  176. }
  177. buffer =
  178. mc_search__recode_str (lc_mc_search->original, lc_mc_search->original_len,
  179. lc_mc_search->original_charset, id, &recoded_str_len);
  180. g_ptr_array_add (ret,
  181. mc_search__cond_struct_new (lc_mc_search, buffer,
  182. recoded_str_len, id));
  183. g_free (buffer);
  184. }
  185. }
  186. else
  187. {
  188. g_ptr_array_add (ret,
  189. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
  190. lc_mc_search->original_len,
  191. lc_mc_search->original_charset));
  192. }
  193. #else
  194. g_ptr_array_add (ret,
  195. mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original,
  196. lc_mc_search->original_len,
  197. str_detect_termencoding ()));
  198. #endif
  199. lc_mc_search->conditions = ret;
  200. return (lc_mc_search->error == MC_SEARCH_E_OK);
  201. }
  202. /* --------------------------------------------------------------------------------------------- */
  203. gboolean
  204. mc_search_run (mc_search_t * lc_mc_search, const void *user_data,
  205. gsize start_search, gsize end_search, gsize * found_len)
  206. {
  207. gboolean ret = FALSE;
  208. if (lc_mc_search == NULL || user_data == NULL)
  209. return FALSE;
  210. if (!mc_search_is_type_avail (lc_mc_search->search_type))
  211. {
  212. lc_mc_search->error = MC_SEARCH_E_INPUT;
  213. lc_mc_search->error_str = g_strdup (_(STR_E_UNKNOWN_TYPE));
  214. return FALSE;
  215. }
  216. #ifdef SEARCH_TYPE_GLIB
  217. if (lc_mc_search->regex_match_info != NULL)
  218. {
  219. g_match_info_free (lc_mc_search->regex_match_info);
  220. lc_mc_search->regex_match_info = NULL;
  221. }
  222. #endif /* SEARCH_TYPE_GLIB */
  223. lc_mc_search->error = MC_SEARCH_E_OK;
  224. g_free (lc_mc_search->error_str);
  225. lc_mc_search->error_str = NULL;
  226. if ((lc_mc_search->conditions == NULL) && !mc_search_prepare (lc_mc_search))
  227. return FALSE;
  228. switch (lc_mc_search->search_type)
  229. {
  230. case MC_SEARCH_T_NORMAL:
  231. ret = mc_search__run_normal (lc_mc_search, user_data, start_search, end_search, found_len);
  232. break;
  233. case MC_SEARCH_T_REGEX:
  234. ret = mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len);
  235. break;
  236. case MC_SEARCH_T_GLOB:
  237. ret = mc_search__run_glob (lc_mc_search, user_data, start_search, end_search, found_len);
  238. break;
  239. case MC_SEARCH_T_HEX:
  240. ret = mc_search__run_hex (lc_mc_search, user_data, start_search, end_search, found_len);
  241. break;
  242. default:
  243. break;
  244. }
  245. return ret;
  246. }
  247. /* --------------------------------------------------------------------------------------------- */
  248. gboolean
  249. mc_search_is_type_avail (mc_search_type_t search_type)
  250. {
  251. switch (search_type)
  252. {
  253. case MC_SEARCH_T_GLOB:
  254. case MC_SEARCH_T_NORMAL:
  255. case MC_SEARCH_T_REGEX:
  256. case MC_SEARCH_T_HEX:
  257. return TRUE;
  258. default:
  259. break;
  260. }
  261. return FALSE;
  262. }
  263. /* --------------------------------------------------------------------------------------------- */
  264. const mc_search_type_str_t *
  265. mc_search_types_list_get (size_t * num)
  266. {
  267. /* don't count last NULL item */
  268. if (num != NULL)
  269. *num = G_N_ELEMENTS (mc_search__list_types) - 1;
  270. return mc_search__list_types;
  271. }
  272. /* --------------------------------------------------------------------------------------------- */
  273. GString *
  274. mc_search_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str)
  275. {
  276. GString *ret;
  277. if (lc_mc_search == NULL)
  278. return g_string_new_len (replace_str->str, replace_str->len);
  279. if (replace_str == NULL || replace_str->str == NULL || replace_str->len == 0)
  280. return g_string_new ("");
  281. switch (lc_mc_search->search_type)
  282. {
  283. case MC_SEARCH_T_REGEX:
  284. ret = mc_search_regex_prepare_replace_str (lc_mc_search, replace_str);
  285. break;
  286. case MC_SEARCH_T_GLOB:
  287. ret = mc_search_glob_prepare_replace_str (lc_mc_search, replace_str);
  288. break;
  289. case MC_SEARCH_T_NORMAL:
  290. ret = mc_search_normal_prepare_replace_str (lc_mc_search, replace_str);
  291. break;
  292. case MC_SEARCH_T_HEX:
  293. ret = mc_search_hex_prepare_replace_str (lc_mc_search, replace_str);
  294. break;
  295. default:
  296. ret = g_string_new_len (replace_str->str, replace_str->len);
  297. break;
  298. }
  299. return ret;
  300. }
  301. /* --------------------------------------------------------------------------------------------- */
  302. char *
  303. mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, char *replace_str)
  304. {
  305. GString *ret;
  306. GString *replace_str2;
  307. replace_str2 = g_string_new (replace_str);
  308. ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2);
  309. g_string_free (replace_str2, TRUE);
  310. return (ret != NULL) ? g_string_free (ret, FALSE) : NULL;
  311. }
  312. /* --------------------------------------------------------------------------------------------- */
  313. gboolean
  314. mc_search_is_fixed_search_str (mc_search_t * lc_mc_search)
  315. {
  316. if (lc_mc_search == NULL)
  317. return FALSE;
  318. switch (lc_mc_search->search_type)
  319. {
  320. case MC_SEARCH_T_REGEX:
  321. case MC_SEARCH_T_GLOB:
  322. return FALSE;
  323. default:
  324. return TRUE;
  325. }
  326. }
  327. /* --------------------------------------------------------------------------------------------- */
  328. /* Search specified pattern in specified string.
  329. *
  330. * @param pattern string to search
  331. * @param pattern_charset charset of #pattern. If NULL then cp_display will be used
  332. * @param str string where search #pattern
  333. * @param search type (normal, regex, hex or glob)
  334. *
  335. * @return TRUE if found is successful, FALSE otherwise.
  336. */
  337. gboolean
  338. mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str,
  339. mc_search_type_t type)
  340. {
  341. gboolean ret;
  342. mc_search_t *search;
  343. if (str == NULL)
  344. return FALSE;
  345. search = mc_search_new (pattern, -1, pattern_charset);
  346. if (search == NULL)
  347. return FALSE;
  348. search->search_type = type;
  349. search->is_case_sensitive = TRUE;
  350. if (type == MC_SEARCH_T_GLOB)
  351. search->is_entire_line = TRUE;
  352. ret = mc_search_run (search, str, 0, strlen (str), NULL);
  353. mc_search_free (search);
  354. return ret;
  355. }
  356. /* --------------------------------------------------------------------------------------------- */
  357. int
  358. mc_search_getstart_result_by_num (mc_search_t * lc_mc_search, int lc_index)
  359. {
  360. if (lc_mc_search == NULL)
  361. return 0;
  362. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  363. return 0;
  364. #ifdef SEARCH_TYPE_GLIB
  365. {
  366. gint start_pos;
  367. gint end_pos;
  368. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  369. return (int) start_pos;
  370. }
  371. #else /* SEARCH_TYPE_GLIB */
  372. return lc_mc_search->iovector[lc_index * 2];
  373. #endif /* SEARCH_TYPE_GLIB */
  374. }
  375. /* --------------------------------------------------------------------------------------------- */
  376. int
  377. mc_search_getend_result_by_num (mc_search_t * lc_mc_search, int lc_index)
  378. {
  379. if (lc_mc_search == NULL)
  380. return 0;
  381. if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL)
  382. return 0;
  383. #ifdef SEARCH_TYPE_GLIB
  384. {
  385. gint start_pos;
  386. gint end_pos;
  387. g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos);
  388. return (int) end_pos;
  389. }
  390. #else /* SEARCH_TYPE_GLIB */
  391. return lc_mc_search->iovector[lc_index * 2 + 1];
  392. #endif /* SEARCH_TYPE_GLIB */
  393. }
  394. /* --------------------------------------------------------------------------------------------- */