simple_pattern.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "../libnetdata.h"
  3. struct simple_pattern {
  4. const char *match;
  5. uint32_t len;
  6. SIMPLE_PREFIX_MODE mode;
  7. bool negative;
  8. bool case_sensitive;
  9. struct simple_pattern *child;
  10. struct simple_pattern *next;
  11. };
  12. static struct simple_pattern *parse_pattern(char *str, SIMPLE_PREFIX_MODE default_mode, size_t count) {
  13. if(unlikely(count >= 1000))
  14. return NULL;
  15. // fprintf(stderr, "PARSING PATTERN: '%s'\n", str);
  16. SIMPLE_PREFIX_MODE mode;
  17. struct simple_pattern *child = NULL;
  18. char *s = str, *c = str;
  19. // skip asterisks in front
  20. while(*c == '*') c++;
  21. // find the next asterisk
  22. while(*c && *c != '*') c++;
  23. // do we have an asterisk in the middle?
  24. if(*c == '*' && c[1] != '\0') {
  25. // yes, we have
  26. child = parse_pattern(c, default_mode, count + 1);
  27. c[1] = '\0';
  28. }
  29. // check what this one matches
  30. size_t len = strlen(s);
  31. if(len >= 2 && *s == '*' && s[len - 1] == '*') {
  32. s[len - 1] = '\0';
  33. s++;
  34. mode = SIMPLE_PATTERN_SUBSTRING;
  35. }
  36. else if(len >= 1 && *s == '*') {
  37. s++;
  38. mode = SIMPLE_PATTERN_SUFFIX;
  39. }
  40. else if(len >= 1 && s[len - 1] == '*') {
  41. s[len - 1] = '\0';
  42. mode = SIMPLE_PATTERN_PREFIX;
  43. }
  44. else
  45. mode = default_mode;
  46. // allocate the structure
  47. struct simple_pattern *m = callocz(1, sizeof(struct simple_pattern));
  48. if(*s) {
  49. m->match = strdupz(s);
  50. m->len = strlen(m->match);
  51. m->mode = mode;
  52. }
  53. else {
  54. m->mode = SIMPLE_PATTERN_SUBSTRING;
  55. }
  56. m->child = child;
  57. return m;
  58. }
  59. SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode, bool case_sensitive) {
  60. struct simple_pattern *root = NULL, *last = NULL;
  61. if(unlikely(!list || !*list)) return root;
  62. char isseparator[256] = {
  63. [' '] = 1 // space
  64. , ['\t'] = 1 // tab
  65. , ['\r'] = 1 // carriage return
  66. , ['\n'] = 1 // new line
  67. , ['\f'] = 1 // form feed
  68. , ['\v'] = 1 // vertical tab
  69. };
  70. if (unlikely(separators && *separators)) {
  71. memset(&isseparator[0], 0, sizeof(isseparator));
  72. while(*separators) isseparator[(unsigned char)*separators++] = 1;
  73. }
  74. char *buf = mallocz(strlen(list) + 1);
  75. const char *s = list;
  76. while(s && *s) {
  77. buf[0] = '\0';
  78. char *c = buf;
  79. bool negative = false;
  80. // skip all spaces
  81. while(isseparator[(unsigned char)*s])
  82. s++;
  83. if(*s == '!') {
  84. negative = true;
  85. s++;
  86. }
  87. // empty string
  88. if(unlikely(!*s))
  89. break;
  90. // find the next space
  91. char escape = 0;
  92. while(*s) {
  93. if(*s == '\\' && !escape) {
  94. escape = 1;
  95. s++;
  96. }
  97. else {
  98. if (isseparator[(unsigned char)*s] && !escape) {
  99. s++;
  100. break;
  101. }
  102. *c++ = *s++;
  103. escape = 0;
  104. }
  105. }
  106. // terminate our string
  107. *c = '\0';
  108. // if we matched the empty string, skip it
  109. if(unlikely(!*buf))
  110. continue;
  111. // fprintf(stderr, "FOUND PATTERN: '%s'\n", buf);
  112. struct simple_pattern *m = parse_pattern(buf, default_mode, 0);
  113. m->negative = negative;
  114. m->case_sensitive = case_sensitive;
  115. if(default_mode == SIMPLE_PATTERN_SUBSTRING) {
  116. m->mode = SIMPLE_PATTERN_SUBSTRING;
  117. struct simple_pattern *tm = m;
  118. for(tm = m; tm->child ; tm = tm->child) ;
  119. tm->mode = SIMPLE_PATTERN_SUBSTRING;
  120. }
  121. // link it at the end
  122. if(unlikely(!root))
  123. root = last = m;
  124. else {
  125. last->next = m;
  126. last = m;
  127. }
  128. }
  129. freez(buf);
  130. return (SIMPLE_PATTERN *)root;
  131. }
  132. static inline char *add_wildcarded(const char *matched, size_t matched_size, char *wildcarded, size_t *wildcarded_size) {
  133. //if(matched_size) {
  134. // char buf[matched_size + 1];
  135. // strncpyz(buf, matched, matched_size);
  136. // fprintf(stderr, "ADD WILDCARDED '%s' of length %zu\n", buf, matched_size);
  137. //}
  138. if(unlikely(wildcarded && *wildcarded_size && matched && *matched && matched_size)) {
  139. size_t wss = *wildcarded_size - 1;
  140. size_t len = (matched_size < wss)?matched_size:wss;
  141. if(likely(len)) {
  142. strncpyz(wildcarded, matched, len);
  143. *wildcarded_size -= len;
  144. return &wildcarded[len];
  145. }
  146. }
  147. return wildcarded;
  148. }
  149. static inline int sp_strcmp(const char *s1, const char *s2, bool case_sensitive) {
  150. if(case_sensitive)
  151. return strcmp(s1, s2);
  152. return strcasecmp(s1, s2);
  153. }
  154. static inline int sp_strncmp(const char *s1, const char *s2, size_t n, bool case_sensitive) {
  155. if(case_sensitive)
  156. return strncmp(s1, s2, n);
  157. return strncasecmp(s1, s2, n);
  158. }
  159. static inline char *sp_strstr(const char *haystack, const char *needle, bool case_sensitive) {
  160. if(case_sensitive)
  161. return strstr(haystack, needle);
  162. return strcasestr(haystack, needle);
  163. }
  164. static inline bool match_pattern(struct simple_pattern *m, const char *str, size_t len, char *wildcarded, size_t *wildcarded_size) {
  165. char *s;
  166. bool loop = true;
  167. while(loop && m->len <= len) {
  168. loop = false;
  169. switch(m->mode) {
  170. default:
  171. case SIMPLE_PATTERN_EXACT:
  172. if(unlikely(sp_strcmp(str, m->match, m->case_sensitive) == 0)) {
  173. if(!m->child) return true;
  174. return false;
  175. }
  176. break;
  177. case SIMPLE_PATTERN_SUBSTRING:
  178. if(!m->len) return true;
  179. if((s = sp_strstr(str, m->match, m->case_sensitive))) {
  180. wildcarded = add_wildcarded(str, s - str, wildcarded, wildcarded_size);
  181. if(!m->child) {
  182. add_wildcarded(&s[m->len], len - (&s[m->len] - str), wildcarded, wildcarded_size);
  183. return true;
  184. }
  185. // instead of recursion
  186. {
  187. len = len - (s - str) - m->len;
  188. str = &s[m->len];
  189. m = m->child;
  190. loop = true;
  191. // return match_pattern(m->child, &s[m->len], len - (s - str) - m->len, wildcarded, wildcarded_size);
  192. }
  193. }
  194. break;
  195. case SIMPLE_PATTERN_PREFIX:
  196. if(unlikely(sp_strncmp(str, m->match, m->len, m->case_sensitive) == 0)) {
  197. if(!m->child) {
  198. add_wildcarded(&str[m->len], len - m->len, wildcarded, wildcarded_size);
  199. return true;
  200. }
  201. // instead of recursion
  202. {
  203. len = len - m->len;
  204. str = &str[m->len];
  205. m = m->child;
  206. loop = true;
  207. // return match_pattern(m->child, &str[m->len], len - m->len, wildcarded, wildcarded_size);
  208. }
  209. }
  210. break;
  211. case SIMPLE_PATTERN_SUFFIX:
  212. if(unlikely(sp_strcmp(&str[len - m->len], m->match, m->case_sensitive) == 0)) {
  213. add_wildcarded(str, len - m->len, wildcarded, wildcarded_size);
  214. if(!m->child) return true;
  215. return false;
  216. }
  217. break;
  218. }
  219. }
  220. return false;
  221. }
  222. static inline SIMPLE_PATTERN_RESULT simple_pattern_matches_extract_with_length(SIMPLE_PATTERN *list, const char *str, size_t len, char *wildcarded, size_t wildcarded_size) {
  223. struct simple_pattern *m, *root = (struct simple_pattern *)list;
  224. for(m = root; m ; m = m->next) {
  225. char *ws = wildcarded;
  226. size_t wss = wildcarded_size;
  227. if(unlikely(ws)) *ws = '\0';
  228. if (match_pattern(m, str, len, ws, &wss)) {
  229. if (m->negative) return SP_MATCHED_NEGATIVE;
  230. return SP_MATCHED_POSITIVE;
  231. }
  232. }
  233. return SP_NOT_MATCHED;
  234. }
  235. SIMPLE_PATTERN_RESULT simple_pattern_matches_buffer_extract(SIMPLE_PATTERN *list, BUFFER *str, char *wildcarded, size_t wildcarded_size) {
  236. if(!list || !str || buffer_strlen(str)) return SP_NOT_MATCHED;
  237. return simple_pattern_matches_extract_with_length(list, buffer_tostring(str), buffer_strlen(str), wildcarded, wildcarded_size);
  238. }
  239. SIMPLE_PATTERN_RESULT simple_pattern_matches_string_extract(SIMPLE_PATTERN *list, STRING *str, char *wildcarded, size_t wildcarded_size) {
  240. if(!list || !str) return SP_NOT_MATCHED;
  241. return simple_pattern_matches_extract_with_length(list, string2str(str), string_strlen(str), wildcarded, wildcarded_size);
  242. }
  243. SIMPLE_PATTERN_RESULT simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size) {
  244. if(!list || !str || !*str) return SP_NOT_MATCHED;
  245. return simple_pattern_matches_extract_with_length(list, str, strlen(str), wildcarded, wildcarded_size);
  246. }
  247. SIMPLE_PATTERN_RESULT simple_pattern_matches_length_extract(SIMPLE_PATTERN *list, const char *str, size_t len, char *wildcarded, size_t wildcarded_size) {
  248. if(!list || !str || !*str || !len) return SP_NOT_MATCHED;
  249. return simple_pattern_matches_extract_with_length(list, str, len, wildcarded, wildcarded_size);
  250. }
  251. static inline void free_pattern(struct simple_pattern *m) {
  252. if(!m) return;
  253. free_pattern(m->child);
  254. free_pattern(m->next);
  255. freez((void *)m->match);
  256. freez(m);
  257. }
  258. void simple_pattern_free(SIMPLE_PATTERN *list) {
  259. if(!list) return;
  260. free_pattern(((struct simple_pattern *)list));
  261. }
  262. /* Debugging patterns
  263. This code should be dead - it is useful for debugging but should not be called by production code.
  264. Feel free to comment it out, but please leave it in the file.
  265. */
  266. extern void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p)
  267. {
  268. struct simple_pattern *root = (struct simple_pattern *)p;
  269. if(root==NULL) {
  270. netdata_log_debug(debug_type,"dump_pattern(NULL)");
  271. return;
  272. }
  273. netdata_log_debug(debug_type,"dump_pattern(%p) child=%p next=%p mode=%u match=%s", root, root->child, root->next, root->mode,
  274. root->match);
  275. if(root->child!=NULL)
  276. simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->child);
  277. if(root->next!=NULL)
  278. simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->next);
  279. }
  280. /* Heuristic: decide if the pattern could match a DNS name.
  281. Although this functionality is used directly by socket.c:connection_allowed() it must be in this file
  282. because of the SIMPLE_PATTERN/simple_pattern structure hiding.
  283. Based on RFC952 / RFC1123. We need to decide if the pattern may match a DNS name, or not. For the negative
  284. cases we need to be sure that it can only match an ipv4 or ipv6 address:
  285. * IPv6 addresses contain ':', which are illegal characters in DNS.
  286. * IPv4 addresses cannot contain alpha- characters.
  287. * DNS TLDs must be alphanumeric to distinguish from IPv4.
  288. Some patterns (e.g. "*a*" ) could match multiple cases (i.e. DNS or IPv6).
  289. Some patterns will be awkward (e.g. "192.168.*") as they look like they are intended to match IPv4-only
  290. but could match DNS (i.e. "192.168.com" is a valid name).
  291. */
  292. static void scan_is_potential_name(struct simple_pattern *p, int *alpha, int *colon, int *wildcards)
  293. {
  294. while (p) {
  295. if (p->match) {
  296. if(p->mode == SIMPLE_PATTERN_EXACT && !strcmp("localhost", p->match)) {
  297. p = p->child;
  298. continue;
  299. }
  300. char const *scan = p->match;
  301. while (*scan != 0) {
  302. if ((*scan >= 'a' && *scan <= 'z') || (*scan >= 'A' && *scan <= 'Z'))
  303. *alpha = 1;
  304. if (*scan == ':')
  305. *colon = 1;
  306. scan++;
  307. }
  308. if (p->mode != SIMPLE_PATTERN_EXACT)
  309. *wildcards = 1;
  310. p = p->child;
  311. }
  312. }
  313. }
  314. extern int simple_pattern_is_potential_name(SIMPLE_PATTERN *p)
  315. {
  316. int alpha=0, colon=0, wildcards=0;
  317. struct simple_pattern *root = (struct simple_pattern*)p;
  318. while (root != NULL) {
  319. if (root->match != NULL) {
  320. scan_is_potential_name(root, &alpha, &colon, &wildcards);
  321. }
  322. if (root->mode != SIMPLE_PATTERN_EXACT)
  323. wildcards = 1;
  324. root = root->next;
  325. }
  326. return (alpha || wildcards) && !colon;
  327. }
  328. char *simple_pattern_trim_around_equal(char *src) {
  329. char *store = mallocz(strlen(src) + 1);
  330. char *dst = store;
  331. while (*src) {
  332. if (*src == '=') {
  333. if (*(dst -1) == ' ')
  334. dst--;
  335. *dst++ = *src++;
  336. if (*src == ' ')
  337. src++;
  338. }
  339. *dst++ = *src++;
  340. }
  341. *dst = 0x00;
  342. return store;
  343. }
  344. char *simple_pattern_iterate(SIMPLE_PATTERN **p)
  345. {
  346. struct simple_pattern *root = (struct simple_pattern *) *p;
  347. struct simple_pattern **Proot = (struct simple_pattern **)p;
  348. (*Proot) = (*Proot)->next;
  349. return (char *) root->match;
  350. }