parser.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. /** @file parser.h
  3. * @brief Header of parser.c
  4. */
  5. #ifndef PARSER_H_
  6. #define PARSER_H_
  7. #include <regex.h>
  8. #include "daemon/common.h"
  9. #include "libnetdata/libnetdata.h"
  10. // Forward decleration
  11. typedef struct log_parser_metrics Log_parser_metrics_t;
  12. /* -------------------------------------------------------------------------- */
  13. /* Configuration-related */
  14. /* -------------------------------------------------------------------------- */
  15. typedef enum{
  16. CHART_COLLECTED_LOGS_TOTAL = 1 << 0,
  17. CHART_COLLECTED_LOGS_RATE = 1 << 1,
  18. /* FLB_WEB_LOG charts */
  19. CHART_VHOST = 1 << 2,
  20. CHART_PORT = 1 << 3,
  21. CHART_IP_VERSION = 1 << 4,
  22. CHART_REQ_CLIENT_CURRENT = 1 << 5,
  23. CHART_REQ_CLIENT_ALL_TIME = 1 << 6,
  24. CHART_REQ_METHODS = 1 << 7,
  25. CHART_REQ_PROTO = 1 << 8,
  26. CHART_BANDWIDTH = 1 << 9,
  27. CHART_REQ_PROC_TIME = 1 << 10,
  28. CHART_RESP_CODE_FAMILY = 1 << 11,
  29. CHART_RESP_CODE = 1 << 12,
  30. CHART_RESP_CODE_TYPE = 1 << 13,
  31. CHART_SSL_PROTO = 1 << 14,
  32. CHART_SSL_CIPHER = 1 << 15,
  33. /* FLB_SYSTEMD or FLB_SYSLOG charts */
  34. CHART_SYSLOG_PRIOR = 1 << 16,
  35. CHART_SYSLOG_SEVER = 1 << 17,
  36. CHART_SYSLOG_FACIL = 1 << 18,
  37. /* FLB_KMSG charts */
  38. CHART_KMSG_SUBSYSTEM = 1 << 19,
  39. CHART_KMSG_DEVICE = 1 << 20,
  40. /* FLB_DOCKER_EV charts */
  41. CHART_DOCKER_EV_TYPE = 1 << 21,
  42. CHART_DOCKER_EV_ACTION = 1 << 22,
  43. /* FLB_MQTT charts*/
  44. CHART_MQTT_TOPIC = 1 << 23
  45. } chart_type_t;
  46. typedef struct log_parser_config{
  47. void *gen_config; /**< Pointer to (optional) generic configuration, as per use case. */
  48. unsigned long int chart_config; /**< Configuration of which charts to enable according to chart_type_t **/
  49. } Log_parser_config_t;
  50. /* -------------------------------------------------------------------------- */
  51. /* -------------------------------------------------------------------------- */
  52. /* Web Log parsing and metrics */
  53. /* -------------------------------------------------------------------------- */
  54. #define VHOST_MAX_LEN 255 /**< Max vhost string length, inclding terminating \0 **/
  55. #define PORT_MAX_LEN 6 /**< Max port string length, inclding terminating \0 **/
  56. #define REQ_SCHEME_MAX_LEN 6 /**< Max request scheme length, including terminating \0 **/
  57. #define REQ_CLIENT_MAX_LEN 46 /**< https://superuser.com/questions/381022/how-many-characters-can-an-ip-address-be#comment2219013_381029 **/
  58. #define REQ_METHOD_MAX_LEN 18 /**< Max request method length, including terminating \0 **/
  59. #define REQ_URL_MAX_LEN 128 /**< Max request URL length, including terminating \0 **/
  60. #define REQ_PROTO_PREF_SIZE (sizeof("HTTP/") - 1)
  61. #define REQ_PROTO_MAX_LEN 4 /**< Max request protocol numerical part length, including terminating \0 **/
  62. #define REQ_SIZE_MAX_LEN 11 /**< Max size of bytes received, including terminating \0 **/
  63. #define REQ_PROC_TIME_MAX_LEN 11 /**< Max size of request processing time, including terminating \0 **/
  64. #define REQ_RESP_CODE_MAX_LEN 4 /**< Max size of response code, including terminating \0 **/
  65. #define REQ_RESP_SIZE_MAX_LEN 11 /**< Max size of request response size, including terminating \0 **/
  66. #define UPS_RESP_TIME_MAX_LEN 10 /**< Max size of upstream response time, including terminating \0 **/
  67. #define SSL_PROTO_MAX_LEN 8 /**< Max SSL protocol length, inclding terminating \0 **/
  68. #define SSL_CIPHER_SUITE_MAX_LEN 256 /**< TODO: Check max len for ssl cipher suite string is indeed 256 **/
  69. #define RESP_CODE_ARR_SIZE 501 /**< Size of resp_code array, assuming 500 valid resp codes + 1 for "other" **/
  70. #define WEB_LOG_INVALID_HOST_STR "invalid"
  71. #define WEB_LOG_INVALID_PORT -1
  72. #define WEB_LOG_INVALID_PORT_STR "inv"
  73. #define WEB_LOG_INVALID_CLIENT_IP_STR WEB_LOG_INVALID_PORT_STR
  74. /* Web log configuration */
  75. #define ENABLE_PARSE_WEB_LOG_LINE_DEBUG 0
  76. #define VHOST_BUFFS_SCALE_FACTOR 1.5
  77. #define PORT_BUFFS_SCALE_FACTOR 8 // Unlike Vhosts, ports are stored as integers, so scale factor can be bigger
  78. typedef enum{
  79. VHOST_WITH_PORT, // nginx: $host:$server_port apache: %v:%p
  80. VHOST, // nginx: $host ($http_host) apache: %v
  81. PORT, // nginx: $server_port apache: %p
  82. REQ_SCHEME, // nginx: $scheme apache: -
  83. REQ_CLIENT, // nginx: $remote_addr apache: %a (%h)
  84. REQ, // nginx: $request apache: %r
  85. REQ_METHOD, // nginx: $request_method apache: %m
  86. REQ_URL, // nginx: $request_uri apache: %U
  87. REQ_PROTO, // nginx: $server_protocol apache: %H
  88. REQ_SIZE, // nginx: $request_length apache: %I
  89. REQ_PROC_TIME, // nginx: $request_time apache: %D
  90. RESP_CODE, // nginx: $status apache: %s, %>s
  91. RESP_SIZE, // nginx: $bytes_sent, $body_bytes_sent apache: %b, %O, %B // TODO: Should separate %b from %O ?
  92. UPS_RESP_TIME, // nginx: $upstream_response_time apache: -
  93. SSL_PROTO, // nginx: $ssl_protocol apache: -
  94. SSL_CIPHER_SUITE, // nginx: $ssl_cipher apache: -
  95. TIME, // nginx: $time_local apache: %t
  96. CUSTOM
  97. } web_log_line_field_t;
  98. typedef struct web_log_parser_config{
  99. web_log_line_field_t *fields;
  100. int num_fields; /**< Number of strings in the fields array. **/
  101. char delimiter; /**< Delimiter that separates the fields in the log format. **/
  102. int verify_parsed_logs; /**< Boolean whether to try and verify parsed log fields or not **/
  103. int skip_timestamp_parsing; /**< Boolean whether to skip parsing of timestamp fields **/
  104. } Web_log_parser_config_t;
  105. static const char *const req_method_str[] = {
  106. "ACL",
  107. "BASELINE-CONTROL",
  108. "BIND",
  109. "CHECKIN",
  110. "CHECKOUT",
  111. "CONNECT",
  112. "COPY",
  113. "DELETE",
  114. "GET",
  115. "HEAD",
  116. "LABEL",
  117. "LINK",
  118. "LOCK",
  119. "MERGE",
  120. "MKACTIVITY",
  121. "MKCALENDAR",
  122. "MKCOL",
  123. "MKREDIRECTREF",
  124. "MKWORKSPACE",
  125. "MOVE",
  126. "OPTIONS",
  127. "ORDERPATCH",
  128. "PATCH",
  129. "POST",
  130. "PRI",
  131. "PROPFIND",
  132. "PROPPATCH",
  133. "PUT",
  134. "REBIND",
  135. "REPORT",
  136. "SEARCH",
  137. "TRACE",
  138. "UNBIND",
  139. "UNCHECKOUT",
  140. "UNLINK",
  141. "UNLOCK",
  142. "UPDATE",
  143. "UPDATEREDIRECTREF",
  144. "-"
  145. };
  146. #define REQ_METHOD_ARR_SIZE (int)(sizeof(req_method_str) / sizeof(req_method_str[0]))
  147. typedef struct web_log_metrics{
  148. /* Web log metrics */
  149. struct log_parser_metrics_vhosts_array{
  150. struct log_parser_metrics_vhost{
  151. char name[VHOST_MAX_LEN]; /**< Name of the vhost **/
  152. int count; /**< Occurences of the vhost **/
  153. } *vhosts;
  154. int size; /**< Size of vhosts array **/
  155. int size_max;
  156. } vhost_arr;
  157. struct log_parser_metrics_ports_array{
  158. struct log_parser_metrics_port{
  159. char name[PORT_MAX_LEN]; /**< Number of port in str */
  160. int port; /**< Number of port **/
  161. int count; /**< Occurences of the port **/
  162. } *ports;
  163. int size; /**< Size of ports array **/
  164. int size_max;
  165. } port_arr;
  166. struct log_parser_metrics_ip_ver{
  167. int v4, v6, invalid;
  168. } ip_ver;
  169. /**< req_clients_current_arr is used by parser.c to save unique client IPs
  170. * extracted per circular buffer item and also in p_file_info to save unique
  171. * client IPs per collection (poll) iteration of plugin_logsmanagement.c.
  172. * req_clients_alltime_arr is used in p_file_info to save unique client IPs
  173. * of all time (and so ipv4_size and ipv6_size can only grow and are never reset to 0). **/
  174. struct log_parser_metrics_req_clients_array{
  175. char (*ipv4_req_clients)[REQ_CLIENT_MAX_LEN];
  176. int ipv4_size;
  177. int ipv4_size_max;
  178. char (*ipv6_req_clients)[REQ_CLIENT_MAX_LEN];
  179. int ipv6_size;
  180. int ipv6_size_max;
  181. } req_clients_current_arr, req_clients_alltime_arr;
  182. int req_method[REQ_METHOD_ARR_SIZE];
  183. struct log_parser_metrics_req_proto{
  184. int http_1, http_1_1, http_2, other;
  185. } req_proto;
  186. struct log_parser_metrics_bandwidth{
  187. long long req_size, resp_size;
  188. } bandwidth;
  189. struct log_parser_metrics_req_proc_time{
  190. int min, max, sum, count;
  191. } req_proc_time;
  192. struct log_parser_metrics_resp_code_family{
  193. int resp_1xx, resp_2xx, resp_3xx, resp_4xx, resp_5xx, other; // TODO: Can there be "other"?
  194. } resp_code_family;
  195. /**< Array counting occurences of response codes. Each item represents the
  196. * respective response code by adding 100 to its index, e.g. resp_code[102]
  197. * counts how many 202 codes were detected. 501st item represents "other" */
  198. unsigned int resp_code[RESP_CODE_ARR_SIZE];
  199. struct log_parser_metrics_resp_code_type{ /* Note: 304 and 401 should be treated as resp_success */
  200. int resp_success, resp_redirect, resp_bad, resp_error, other; // TODO: Can there be "other"?
  201. } resp_code_type;
  202. struct log_parser_metrics_ssl_proto{
  203. int tlsv1, tlsv1_1, tlsv1_2, tlsv1_3, sslv2, sslv3, other;
  204. } ssl_proto;
  205. struct log_parser_metrics_ssl_cipher_array{
  206. struct log_parser_metrics_ssl_cipher{
  207. char name[SSL_CIPHER_SUITE_MAX_LEN]; /**< SSL cipher suite string **/
  208. int count; /**< Occurences of the SSL cipher **/
  209. } *ssl_ciphers;
  210. int size; /**< Size of SSL ciphers array **/
  211. } ssl_cipher_arr;
  212. int64_t timestamp;
  213. } Web_log_metrics_t;
  214. typedef struct log_line_parsed{
  215. char vhost[VHOST_MAX_LEN];
  216. int port;
  217. char req_scheme[REQ_SCHEME_MAX_LEN];
  218. char req_client[REQ_CLIENT_MAX_LEN];
  219. char req_method[REQ_METHOD_MAX_LEN];
  220. char req_URL[REQ_URL_MAX_LEN];
  221. char req_proto[REQ_PROTO_MAX_LEN];
  222. int req_size;
  223. int req_proc_time;
  224. int resp_code;
  225. int resp_size;
  226. int ups_resp_time;
  227. char ssl_proto[SSL_PROTO_MAX_LEN];
  228. char ssl_cipher[SSL_CIPHER_SUITE_MAX_LEN];
  229. int64_t timestamp;
  230. int parsing_errors;
  231. } Log_line_parsed_t;
  232. Web_log_parser_config_t *read_web_log_parser_config(const char *log_format, const char delimiter);
  233. #ifdef ENABLE_LOGSMANAGEMENT_TESTS
  234. /* Used as public only for unit testing, normally defined as static */
  235. int count_fields(const char *line, const char delimiter);
  236. #endif // ENABLE_LOGSMANAGEMENT_TESTS
  237. void parse_web_log_line(const Web_log_parser_config_t *wblp_config,
  238. char *line, const size_t line_len,
  239. Log_line_parsed_t *log_line_parsed);
  240. void extract_web_log_metrics(Log_parser_config_t *parser_config,
  241. Log_line_parsed_t *line_parsed,
  242. Web_log_metrics_t *metrics);
  243. Web_log_parser_config_t *auto_detect_web_log_parser_config(char *line, const char delimiter);
  244. /* -------------------------------------------------------------------------- */
  245. /* -------------------------------------------------------------------------- */
  246. /* Kernel logs (kmsg) metrics */
  247. /* -------------------------------------------------------------------------- */
  248. #define SYSLOG_SEVER_ARR_SIZE 9 /**< Number of severity levels plus 1 for 'unknown' **/
  249. typedef struct metrics_dict_item{
  250. bool dim_initialized;
  251. int num;
  252. int num_new;
  253. } metrics_dict_item_t;
  254. typedef struct kernel_metrics{
  255. unsigned int sever[SYSLOG_SEVER_ARR_SIZE]; /**< Syslog severity, 0-7 plus 1 space for 'unknown' **/
  256. DICTIONARY *subsystem;
  257. DICTIONARY *device;
  258. } Kernel_metrics_t;
  259. /* -------------------------------------------------------------------------- */
  260. /* -------------------------------------------------------------------------- */
  261. /* Systemd and Syslog metrics */
  262. /* -------------------------------------------------------------------------- */
  263. #define SYSLOG_FACIL_ARR_SIZE 25 /**< Number of facility levels plus 1 for 'unknown' **/
  264. #define SYSLOG_PRIOR_ARR_SIZE 193 /**< Number of priority values plus 1 for 'unknown' **/
  265. typedef struct systemd_metrics{
  266. unsigned int sever[SYSLOG_SEVER_ARR_SIZE]; /**< Syslog severity, 0-7 plus 1 space for 'unknown' **/
  267. unsigned int facil[SYSLOG_FACIL_ARR_SIZE]; /**< Syslog facility, 0-23 plus 1 space for 'unknown' **/
  268. unsigned int prior[SYSLOG_PRIOR_ARR_SIZE]; /**< Syslog priority value, 0-191 plus 1 space for 'unknown' **/
  269. } Systemd_metrics_t;
  270. /* -------------------------------------------------------------------------- */
  271. /* -------------------------------------------------------------------------- */
  272. /* Docker Events metrics */
  273. /* -------------------------------------------------------------------------- */
  274. static const char *const docker_ev_type_string[] = {
  275. "container", "image", "plugin", "volume", "network", "daemon", "service", "node", "secret", "config", "unknown"
  276. };
  277. #define NUM_OF_DOCKER_EV_TYPES ((int) (sizeof docker_ev_type_string / sizeof docker_ev_type_string[0]))
  278. #define NUM_OF_CONTAINER_ACTIONS 25 /**< == size of 'Containers actions' array, largest array in docker_ev_action_string **/
  279. static const char *const docker_ev_action_string[NUM_OF_DOCKER_EV_TYPES][NUM_OF_CONTAINER_ACTIONS] = {
  280. /* Order of arrays is important, it must match the order of docker_ev_type_string[] strings. */
  281. /* Containers actions */
  282. {"attach", "commit", "copy", "create", "destroy", "detach", "die", "exec_create", "exec_detach", "exec_die",
  283. "exec_start", "export", "health_status", "kill", "oom", "pause", "rename", "resize", "restart", "start", "stop",
  284. "top", "unpause", "update", NULL},
  285. /* Images actions */
  286. {"delete", "import", "load", "pull", "push", "save", "tag", "untag", NULL},
  287. /* Plugins actions */
  288. {"enable", "disable", "install", "remove", NULL},
  289. /* Volumes actions */
  290. {"create", "destroy", "mount", "unmount", NULL},
  291. /* Networks actions */
  292. {"create", "connect", "destroy", "disconnect", "remove", NULL},
  293. /* Daemons actions */
  294. {"reload", NULL},
  295. /* Services actions */
  296. {"create", "remove", "update", NULL},
  297. /* Nodes actions */
  298. {"create", "remove", "update", NULL},
  299. /* Secrets actions */
  300. {"create", "remove", "update", NULL},
  301. /* Configs actions */
  302. {"create", "remove", "update", NULL},
  303. {"unknown", NULL}
  304. };
  305. typedef struct docker_ev_metrics{
  306. unsigned int ev_type[NUM_OF_DOCKER_EV_TYPES];
  307. unsigned int ev_action[NUM_OF_DOCKER_EV_TYPES][NUM_OF_CONTAINER_ACTIONS];
  308. } Docker_ev_metrics_t;
  309. /* -------------------------------------------------------------------------- */
  310. /* -------------------------------------------------------------------------- */
  311. /* MQTT metrics */
  312. /* -------------------------------------------------------------------------- */
  313. typedef struct mqtt_metrics{
  314. DICTIONARY *topic;
  315. } Mqtt_metrics_t;
  316. /* -------------------------------------------------------------------------- */
  317. /* -------------------------------------------------------------------------- */
  318. /* Regex / Keyword search */
  319. /* -------------------------------------------------------------------------- */
  320. #define MAX_KEYWORD_LEN 100 /**< Max size of keyword used in keyword search, in bytes */
  321. #define MAX_REGEX_SIZE MAX_KEYWORD_LEN + 7 /**< Max size of regular expression (used in keyword search) in bytes **/
  322. int search_keyword( char *src, size_t src_sz,
  323. char *dest, size_t *dest_sz,
  324. const char *keyword, regex_t *regex,
  325. const int ignore_case);
  326. /* -------------------------------------------------------------------------- */
  327. /* -------------------------------------------------------------------------- */
  328. /* Custom Charts configuration and metrics */
  329. /* -------------------------------------------------------------------------- */
  330. typedef struct log_parser_cus_config{
  331. char *chartname; /**< Chart name where the regex metrics will appear in **/
  332. char *regex_str; /**< String representation of the regex **/
  333. char *regex_name; /**< If regex is named, this is where its name is stored **/
  334. regex_t regex; /**< The compiled regex **/
  335. } Log_parser_cus_config_t;
  336. typedef struct log_parser_cus_metrics{
  337. unsigned long long count;
  338. } Log_parser_cus_metrics_t;
  339. /* -------------------------------------------------------------------------- */
  340. /* -------------------------------------------------------------------------- */
  341. /* General / Other */
  342. /* -------------------------------------------------------------------------- */
  343. struct log_parser_metrics{
  344. unsigned long long num_lines;
  345. // struct timeval tv;
  346. time_t last_update;
  347. union {
  348. Web_log_metrics_t *web_log;
  349. Kernel_metrics_t *kernel;
  350. Systemd_metrics_t *systemd;
  351. Docker_ev_metrics_t *docker_ev;
  352. Mqtt_metrics_t *mqtt;
  353. };
  354. Log_parser_cus_metrics_t **parser_cus; /**< Array storing custom chart metrics structs **/
  355. } ;
  356. #endif // PARSER_H_