simple_pattern.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "../libnetdata.h"
  3. struct simple_pattern {
  4. const char *match;
  5. size_t len;
  6. SIMPLE_PREFIX_MODE mode;
  7. char negative;
  8. struct simple_pattern *child;
  9. struct simple_pattern *next;
  10. };
  11. static inline struct simple_pattern *parse_pattern(char *str, SIMPLE_PREFIX_MODE default_mode) {
  12. // fprintf(stderr, "PARSING PATTERN: '%s'\n", str);
  13. SIMPLE_PREFIX_MODE mode;
  14. struct simple_pattern *child = NULL;
  15. char *s = str, *c = str;
  16. // skip asterisks in front
  17. while(*c == '*') c++;
  18. // find the next asterisk
  19. while(*c && *c != '*') c++;
  20. // do we have an asterisk in the middle?
  21. if(*c == '*' && c[1] != '\0') {
  22. // yes, we have
  23. child = parse_pattern(c, default_mode);
  24. c[1] = '\0';
  25. }
  26. // check what this one matches
  27. size_t len = strlen(s);
  28. if(len >= 2 && *s == '*' && s[len - 1] == '*') {
  29. s[len - 1] = '\0';
  30. s++;
  31. mode = SIMPLE_PATTERN_SUBSTRING;
  32. }
  33. else if(len >= 1 && *s == '*') {
  34. s++;
  35. mode = SIMPLE_PATTERN_SUFFIX;
  36. }
  37. else if(len >= 1 && s[len - 1] == '*') {
  38. s[len - 1] = '\0';
  39. mode = SIMPLE_PATTERN_PREFIX;
  40. }
  41. else
  42. mode = default_mode;
  43. // allocate the structure
  44. struct simple_pattern *m = callocz(1, sizeof(struct simple_pattern));
  45. if(*s) {
  46. m->match = strdupz(s);
  47. m->len = strlen(m->match);
  48. m->mode = mode;
  49. }
  50. else {
  51. m->mode = SIMPLE_PATTERN_SUBSTRING;
  52. }
  53. m->child = child;
  54. return m;
  55. }
  56. SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode) {
  57. struct simple_pattern *root = NULL, *last = NULL;
  58. if(unlikely(!list || !*list)) return root;
  59. int isseparator[256] = {
  60. [' '] = 1 // space
  61. , ['\t'] = 1 // tab
  62. , ['\r'] = 1 // carriage return
  63. , ['\n'] = 1 // new line
  64. , ['\f'] = 1 // form feed
  65. , ['\v'] = 1 // vertical tab
  66. };
  67. if (unlikely(separators && *separators)) {
  68. memset(&isseparator[0], 0, sizeof(isseparator));
  69. while(*separators) isseparator[(unsigned char)*separators++] = 1;
  70. }
  71. char *buf = mallocz(strlen(list) + 1);
  72. const char *s = list;
  73. while(s && *s) {
  74. buf[0] = '\0';
  75. char *c = buf;
  76. char negative = 0;
  77. // skip all spaces
  78. while(isseparator[(unsigned char)*s])
  79. s++;
  80. if(*s == '!') {
  81. negative = 1;
  82. s++;
  83. }
  84. // empty string
  85. if(unlikely(!*s))
  86. break;
  87. // find the next space
  88. char escape = 0;
  89. while(*s) {
  90. if(*s == '\\' && !escape) {
  91. escape = 1;
  92. s++;
  93. }
  94. else {
  95. if (isseparator[(unsigned char)*s] && !escape) {
  96. s++;
  97. break;
  98. }
  99. *c++ = *s++;
  100. escape = 0;
  101. }
  102. }
  103. // terminate our string
  104. *c = '\0';
  105. // if we matched the empty string, skip it
  106. if(unlikely(!*buf))
  107. continue;
  108. // fprintf(stderr, "FOUND PATTERN: '%s'\n", buf);
  109. struct simple_pattern *m = parse_pattern(buf, default_mode);
  110. m->negative = negative;
  111. // link it at the end
  112. if(unlikely(!root))
  113. root = last = m;
  114. else {
  115. last->next = m;
  116. last = m;
  117. }
  118. }
  119. freez(buf);
  120. return (SIMPLE_PATTERN *)root;
  121. }
  122. static inline char *add_wildcarded(const char *matched, size_t matched_size, char *wildcarded, size_t *wildcarded_size) {
  123. //if(matched_size) {
  124. // char buf[matched_size + 1];
  125. // strncpyz(buf, matched, matched_size);
  126. // fprintf(stderr, "ADD WILDCARDED '%s' of length %zu\n", buf, matched_size);
  127. //}
  128. if(unlikely(wildcarded && *wildcarded_size && matched && *matched && matched_size)) {
  129. size_t wss = *wildcarded_size - 1;
  130. size_t len = (matched_size < wss)?matched_size:wss;
  131. if(likely(len)) {
  132. strncpyz(wildcarded, matched, len);
  133. *wildcarded_size -= len;
  134. return &wildcarded[len];
  135. }
  136. }
  137. return wildcarded;
  138. }
  139. static inline int match_pattern(struct simple_pattern *m, const char *str, size_t len, char *wildcarded, size_t *wildcarded_size) {
  140. char *s;
  141. if(m->len <= len) {
  142. switch(m->mode) {
  143. case SIMPLE_PATTERN_SUBSTRING:
  144. if(!m->len) return 1;
  145. if((s = strstr(str, m->match))) {
  146. wildcarded = add_wildcarded(str, s - str, wildcarded, wildcarded_size);
  147. if(!m->child) {
  148. wildcarded = add_wildcarded(&s[m->len], len - (&s[m->len] - str), wildcarded, wildcarded_size);
  149. return 1;
  150. }
  151. return match_pattern(m->child, &s[m->len], len - (s - str) - m->len, wildcarded, wildcarded_size);
  152. }
  153. break;
  154. case SIMPLE_PATTERN_PREFIX:
  155. if(unlikely(strncmp(str, m->match, m->len) == 0)) {
  156. if(!m->child) {
  157. wildcarded = add_wildcarded(&str[m->len], len - m->len, wildcarded, wildcarded_size);
  158. return 1;
  159. }
  160. return match_pattern(m->child, &str[m->len], len - m->len, wildcarded, wildcarded_size);
  161. }
  162. break;
  163. case SIMPLE_PATTERN_SUFFIX:
  164. if(unlikely(strcmp(&str[len - m->len], m->match) == 0)) {
  165. wildcarded = add_wildcarded(str, len - m->len, wildcarded, wildcarded_size);
  166. if(!m->child) return 1;
  167. return 0;
  168. }
  169. break;
  170. case SIMPLE_PATTERN_EXACT:
  171. default:
  172. if(unlikely(strcmp(str, m->match) == 0)) {
  173. if(!m->child) return 1;
  174. return 0;
  175. }
  176. break;
  177. }
  178. }
  179. return 0;
  180. }
  181. int simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size) {
  182. struct simple_pattern *m, *root = (struct simple_pattern *)list;
  183. if(unlikely(!root || !str || !*str)) return 0;
  184. size_t len = strlen(str);
  185. for(m = root; m ; m = m->next) {
  186. char *ws = wildcarded;
  187. size_t wss = wildcarded_size;
  188. if(unlikely(ws)) *ws = '\0';
  189. if (match_pattern(m, str, len, ws, &wss)) {
  190. //if(ws && wss)
  191. // fprintf(stderr, "FINAL WILDCARDED '%s' of length %zu\n", ws, strlen(ws));
  192. if (m->negative) return 0;
  193. return 1;
  194. }
  195. }
  196. return 0;
  197. }
  198. static inline void free_pattern(struct simple_pattern *m) {
  199. if(!m) return;
  200. free_pattern(m->child);
  201. free_pattern(m->next);
  202. freez((void *)m->match);
  203. freez(m);
  204. }
  205. void simple_pattern_free(SIMPLE_PATTERN *list) {
  206. if(!list) return;
  207. free_pattern(((struct simple_pattern *)list));
  208. }
  209. /* Debugging patterns
  210. This code should be dead - it is useful for debugging but should not be called by production code.
  211. Feel free to comment it out, but please leave it in the file.
  212. */
  213. extern void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p)
  214. {
  215. struct simple_pattern *root = (struct simple_pattern *)p;
  216. if(root==NULL) {
  217. debug(debug_type,"dump_pattern(NULL)");
  218. return;
  219. }
  220. debug(debug_type,"dump_pattern(%p) child=%p next=%p mode=%u match=%s", root, root->child, root->next, root->mode,
  221. root->match);
  222. if(root->child!=NULL)
  223. simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->child);
  224. if(root->next!=NULL)
  225. simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->next);
  226. }
  227. /* Heuristic: decide if the pattern could match a DNS name.
  228. Although this functionality is used directly by socket.c:connection_allowed() it must be in this file
  229. because of the SIMPLE_PATTERN/simple_pattern structure hiding.
  230. Based on RFC952 / RFC1123. We need to decide if the pattern may match a DNS name, or not. For the negative
  231. cases we need to be sure that it can only match an ipv4 or ipv6 address:
  232. * IPv6 addresses contain ':', which are illegal characters in DNS.
  233. * IPv4 addresses cannot contain alpha- characters.
  234. * DNS TLDs must be alphanumeric to distinguish from IPv4.
  235. Some patterns (e.g. "*a*" ) could match multiple cases (i.e. DNS or IPv6).
  236. Some patterns will be awkward (e.g. "192.168.*") as they look like they are intended to match IPv4-only
  237. but could match DNS (i.e. "192.168.com" is a valid name).
  238. */
  239. static void scan_is_potential_name(struct simple_pattern *p, int *alpha, int *colon, int *wildcards)
  240. {
  241. while (p) {
  242. if (p->match) {
  243. if(p->mode == SIMPLE_PATTERN_EXACT && !strcmp("localhost", p->match)) {
  244. p = p->child;
  245. continue;
  246. }
  247. char const *scan = p->match;
  248. while (*scan != 0) {
  249. if ((*scan >= 'a' && *scan <= 'z') || (*scan >= 'A' && *scan <= 'Z'))
  250. *alpha = 1;
  251. if (*scan == ':')
  252. *colon = 1;
  253. scan++;
  254. }
  255. if (p->mode != SIMPLE_PATTERN_EXACT)
  256. *wildcards = 1;
  257. p = p->child;
  258. }
  259. }
  260. }
  261. extern int simple_pattern_is_potential_name(SIMPLE_PATTERN *p)
  262. {
  263. int alpha=0, colon=0, wildcards=0;
  264. struct simple_pattern *root = (struct simple_pattern*)p;
  265. while (root != NULL) {
  266. if (root->match != NULL) {
  267. scan_is_potential_name(root, &alpha, &colon, &wildcards);
  268. }
  269. if (root->mode != SIMPLE_PATTERN_EXACT)
  270. wildcards = 1;
  271. root = root->next;
  272. }
  273. return (alpha || wildcards) && !colon;
  274. }
  275. char *simple_pattern_trim_around_equal(char *src) {
  276. char *store = mallocz(strlen(src) +1);
  277. if(!store)
  278. return NULL;
  279. char *dst = store;
  280. while (*src) {
  281. if (*src == '=') {
  282. if (*(dst -1) == ' ')
  283. dst--;
  284. *dst++ = *src++;
  285. if (*src == ' ')
  286. src++;
  287. }
  288. *dst++ = *src++;
  289. }
  290. *dst = 0x00;
  291. return store;
  292. }
  293. char *simple_pattern_iterate(SIMPLE_PATTERN **p)
  294. {
  295. struct simple_pattern *root = (struct simple_pattern *) *p;
  296. struct simple_pattern **Proot = (struct simple_pattern **)p;
  297. (*Proot) = (*Proot)->next;
  298. return (char *) root->match;
  299. }