log2journal.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #ifndef NETDATA_LOG2JOURNAL_H
  3. #define NETDATA_LOG2JOURNAL_H
  4. // only for PACKAGE_VERSION
  5. #include "config.h"
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <dirent.h>
  9. #include <string.h>
  10. #include <stdbool.h>
  11. #include <string.h>
  12. #include <ctype.h>
  13. #include <math.h>
  14. #include <stdarg.h>
  15. #include <assert.h>
  16. // ----------------------------------------------------------------------------
  17. // logging
  18. // enable the compiler to check for printf like errors on our log2stderr() function
  19. static inline void log2stderr(const char *format, ...) __attribute__ ((format(__printf__, 1, 2)));
  20. static inline void log2stderr(const char *format, ...) {
  21. va_list args;
  22. va_start(args, format);
  23. vfprintf(stderr, format, args);
  24. va_end(args);
  25. fprintf(stderr, "\n");
  26. }
  27. // ----------------------------------------------------------------------------
  28. // allocation functions abstraction
  29. static inline void *mallocz(size_t size) {
  30. void *ptr = malloc(size);
  31. if (!ptr) {
  32. log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", size);
  33. exit(EXIT_FAILURE);
  34. }
  35. return ptr;
  36. }
  37. static inline void *callocz(size_t elements, size_t size) {
  38. void *ptr = calloc(elements, size);
  39. if (!ptr) {
  40. log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", elements * size);
  41. exit(EXIT_FAILURE);
  42. }
  43. return ptr;
  44. }
  45. static inline void *reallocz(void *ptr, size_t size) {
  46. void *new_ptr = realloc(ptr, size);
  47. if (!new_ptr) {
  48. log2stderr("Fatal Error: Memory reallocation failed. Requested size: %zu bytes.", size);
  49. exit(EXIT_FAILURE);
  50. }
  51. return new_ptr;
  52. }
  53. static inline char *strdupz(const char *s) {
  54. char *ptr = strdup(s);
  55. if (!ptr) {
  56. log2stderr("Fatal Error: Memory allocation failed in strdup.");
  57. exit(EXIT_FAILURE);
  58. }
  59. return ptr;
  60. }
  61. static inline char *strndupz(const char *s, size_t n) {
  62. char *ptr = strndup(s, n);
  63. if (!ptr) {
  64. log2stderr("Fatal Error: Memory allocation failed in strndup. Requested size: %zu bytes.", n);
  65. exit(EXIT_FAILURE);
  66. }
  67. return ptr;
  68. }
  69. static inline void freez(void *ptr) {
  70. if (ptr)
  71. free(ptr);
  72. }
  73. // ----------------------------------------------------------------------------
  74. #define XXH_INLINE_ALL
  75. #include "../../libnetdata/xxhash.h"
  76. #define PCRE2_CODE_UNIT_WIDTH 8
  77. #include <pcre2.h>
  78. #ifdef HAVE_LIBYAML
  79. #include <yaml.h>
  80. #endif
  81. // ----------------------------------------------------------------------------
  82. // hashtable for HASHED_KEY
  83. // cleanup hashtable defines
  84. #undef SIMPLE_HASHTABLE_SORT_FUNCTION
  85. #undef SIMPLE_HASHTABLE_VALUE_TYPE
  86. #undef SIMPLE_HASHTABLE_NAME
  87. #undef NETDATA_SIMPLE_HASHTABLE_H
  88. struct hashed_key;
  89. static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2);
  90. #define SIMPLE_HASHTABLE_SORT_FUNCTION compare_keys
  91. #define SIMPLE_HASHTABLE_VALUE_TYPE struct hashed_key
  92. #define SIMPLE_HASHTABLE_NAME _KEY
  93. #include "../../libnetdata/simple_hashtable.h"
  94. // ----------------------------------------------------------------------------
  95. #define MAX_OUTPUT_KEYS 1024
  96. #define MAX_LINE_LENGTH (1024 * 1024)
  97. #define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2)
  98. #define MAX_REWRITES (MAX_OUTPUT_KEYS / 2)
  99. #define MAX_RENAMES (MAX_OUTPUT_KEYS / 2)
  100. #define JOURNAL_MAX_KEY_LEN 64 // according to systemd-journald
  101. #define JOURNAL_MAX_VALUE_LEN (48 * 1024) // according to systemd-journald
  102. #define LOG2JOURNAL_CONFIG_PATH LIBCONFIG_DIR "/log2journal.d"
  103. // ----------------------------------------------------------------------------
  104. // character conversion for journal keys
  105. extern const char journal_key_characters_map[256];
  106. // ----------------------------------------------------------------------------
  107. // copy to buffer, while ensuring there is no buffer overflow
  108. static inline size_t copy_to_buffer(char *dst, size_t dst_size, const char *src, size_t src_len) {
  109. if(dst_size < 2) {
  110. if(dst_size == 1)
  111. *dst = '\0';
  112. return 0;
  113. }
  114. if(src_len <= dst_size - 1) {
  115. memcpy(dst, src, src_len);
  116. dst[src_len] = '\0';
  117. return src_len;
  118. }
  119. else {
  120. memcpy(dst, src, dst_size - 1);
  121. dst[dst_size - 1] = '\0';
  122. return dst_size - 1;
  123. }
  124. }
  125. // ----------------------------------------------------------------------------
  126. // A dynamically sized, reusable text buffer,
  127. // allowing us to be fast (no allocations during iterations) while having the
  128. // smallest possible allocations.
  129. typedef struct txt {
  130. char *txt;
  131. uint32_t size;
  132. uint32_t len;
  133. } TEXT;
  134. static inline void txt_cleanup(TEXT *t) {
  135. if(!t)
  136. return;
  137. if(t->txt)
  138. freez(t->txt);
  139. t->txt = NULL;
  140. t->size = 0;
  141. t->len = 0;
  142. }
  143. static inline void txt_replace(TEXT *t, const char *s, size_t len) {
  144. if(!s || !*s || len == 0) {
  145. s = "";
  146. len = 0;
  147. }
  148. if(len + 1 <= t->size) {
  149. // the existing value allocation, fits our value
  150. memcpy(t->txt, s, len);
  151. t->txt[len] = '\0';
  152. t->len = len;
  153. }
  154. else {
  155. // no existing value allocation, or too small for our value
  156. // cleanup and increase the buffer
  157. txt_cleanup(t);
  158. t->txt = strndupz(s, len);
  159. t->size = len + 1;
  160. t->len = len;
  161. }
  162. }
  163. static inline void txt_expand_and_append(TEXT *t, const char *s, size_t len) {
  164. if(len + 1 > (t->size - t->len)) {
  165. size_t new_size = t->len + len + 1;
  166. if(new_size < t->size * 2)
  167. new_size = t->size * 2;
  168. t->txt = reallocz(t->txt, new_size);
  169. t->size = new_size;
  170. }
  171. char *copy_to = &t->txt[t->len];
  172. memcpy(copy_to, s, len);
  173. copy_to[len] = '\0';
  174. t->len += len;
  175. }
  176. // ----------------------------------------------------------------------------
  177. typedef enum __attribute__((__packed__)) {
  178. HK_NONE = 0,
  179. // permanent flags - they are set once to optimize various decisions and lookups
  180. HK_HASHTABLE_ALLOCATED = (1 << 0), // this is key object allocated in the hashtable
  181. // objects that do not have this, have a pointer to a key in the hashtable
  182. // objects that have this, value a value allocated
  183. HK_FILTERED = (1 << 1), // we checked once if this key in filtered
  184. HK_FILTERED_INCLUDED = (1 << 2), // the result of the filtering was to include it in the output
  185. HK_COLLISION_CHECKED = (1 << 3), // we checked once for collision check of this key
  186. HK_RENAMES_CHECKED = (1 << 4), // we checked once if there are renames on this key
  187. HK_HAS_RENAMES = (1 << 5), // and we found there is a rename rule related to it
  188. // ephemeral flags - they are unset at the end of each log line
  189. HK_VALUE_FROM_LOG = (1 << 14), // the value of this key has been read from the log (or from injection, duplication)
  190. HK_VALUE_REWRITTEN = (1 << 15), // the value of this key has been rewritten due to one of our rewrite rules
  191. } HASHED_KEY_FLAGS;
  192. typedef struct hashed_key {
  193. const char *key;
  194. uint32_t len;
  195. HASHED_KEY_FLAGS flags;
  196. XXH64_hash_t hash;
  197. union {
  198. struct hashed_key *hashtable_ptr; // HK_HASHTABLE_ALLOCATED is not set
  199. TEXT value; // HK_HASHTABLE_ALLOCATED is set
  200. };
  201. } HASHED_KEY;
  202. static inline void hashed_key_cleanup(HASHED_KEY *k) {
  203. if(k->key) {
  204. freez((void *)k->key);
  205. k->key = NULL;
  206. }
  207. if(k->flags & HK_HASHTABLE_ALLOCATED)
  208. txt_cleanup(&k->value);
  209. else
  210. k->hashtable_ptr = NULL;
  211. }
  212. static inline void hashed_key_set(HASHED_KEY *k, const char *name) {
  213. hashed_key_cleanup(k);
  214. k->key = strdupz(name);
  215. k->len = strlen(k->key);
  216. k->hash = XXH3_64bits(k->key, k->len);
  217. k->flags = HK_NONE;
  218. }
  219. static inline void hashed_key_len_set(HASHED_KEY *k, const char *name, size_t len) {
  220. hashed_key_cleanup(k);
  221. k->key = strndupz(name, len);
  222. k->len = len;
  223. k->hash = XXH3_64bits(k->key, k->len);
  224. k->flags = HK_NONE;
  225. }
  226. static inline bool hashed_keys_match(HASHED_KEY *k1, HASHED_KEY *k2) {
  227. return ((k1 == k2) || (k1->hash == k2->hash && strcmp(k1->key, k2->key) == 0));
  228. }
  229. static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2) {
  230. return strcmp(k1->key, k2->key);
  231. }
  232. // ----------------------------------------------------------------------------
  233. typedef struct search_pattern {
  234. const char *pattern;
  235. pcre2_code *re;
  236. pcre2_match_data *match_data;
  237. TEXT error;
  238. } SEARCH_PATTERN;
  239. void search_pattern_cleanup(SEARCH_PATTERN *sp);
  240. bool search_pattern_set(SEARCH_PATTERN *sp, const char *search_pattern, size_t search_pattern_len);
  241. static inline bool search_pattern_matches(SEARCH_PATTERN *sp, const char *value, size_t value_len) {
  242. return pcre2_match(sp->re, (PCRE2_SPTR)value, value_len, 0, 0, sp->match_data, NULL) >= 0;
  243. }
  244. // ----------------------------------------------------------------------------
  245. typedef struct replacement_node {
  246. HASHED_KEY name;
  247. bool is_variable;
  248. bool logged_error;
  249. struct replacement_node *next;
  250. } REPLACE_NODE;
  251. void replace_node_free(REPLACE_NODE *rpn);
  252. typedef struct replace_pattern {
  253. const char *pattern;
  254. REPLACE_NODE *nodes;
  255. bool has_variables;
  256. } REPLACE_PATTERN;
  257. void replace_pattern_cleanup(REPLACE_PATTERN *rp);
  258. bool replace_pattern_set(REPLACE_PATTERN *rp, const char *pattern);
  259. // ----------------------------------------------------------------------------
  260. typedef struct injection {
  261. bool on_unmatched;
  262. HASHED_KEY key;
  263. REPLACE_PATTERN value;
  264. } INJECTION;
  265. void injection_cleanup(INJECTION *inj);
  266. // ----------------------------------------------------------------------------
  267. typedef struct key_rename {
  268. HASHED_KEY new_key;
  269. HASHED_KEY old_key;
  270. } RENAME;
  271. void rename_cleanup(RENAME *rn);
  272. // ----------------------------------------------------------------------------
  273. typedef enum __attribute__((__packed__)) {
  274. RW_NONE = 0,
  275. RW_MATCH_PCRE2 = (1 << 1), // a rewrite rule
  276. RW_MATCH_NON_EMPTY = (1 << 2), // a rewrite rule
  277. RW_DONT_STOP = (1 << 3),
  278. RW_INJECT = (1 << 4),
  279. } RW_FLAGS;
  280. typedef struct key_rewrite {
  281. RW_FLAGS flags;
  282. HASHED_KEY key;
  283. union {
  284. SEARCH_PATTERN match_pcre2;
  285. REPLACE_PATTERN match_non_empty;
  286. };
  287. REPLACE_PATTERN value;
  288. } REWRITE;
  289. void rewrite_cleanup(REWRITE *rw);
  290. // ----------------------------------------------------------------------------
  291. // A job configuration and runtime structures
  292. typedef struct log_job {
  293. bool show_config;
  294. const char *pattern;
  295. const char *prefix;
  296. SIMPLE_HASHTABLE_KEY hashtable;
  297. struct {
  298. const char *buffer;
  299. const char *trimmed;
  300. size_t trimmed_len;
  301. size_t size;
  302. HASHED_KEY key;
  303. } line;
  304. struct {
  305. SEARCH_PATTERN include;
  306. SEARCH_PATTERN exclude;
  307. } filter;
  308. struct {
  309. bool last_line_was_empty;
  310. HASHED_KEY key;
  311. TEXT current;
  312. } filename;
  313. struct {
  314. uint32_t used;
  315. INJECTION keys[MAX_INJECTIONS];
  316. } injections;
  317. struct {
  318. HASHED_KEY key;
  319. struct {
  320. uint32_t used;
  321. INJECTION keys[MAX_INJECTIONS];
  322. } injections;
  323. } unmatched;
  324. struct {
  325. uint32_t used;
  326. REWRITE array[MAX_REWRITES];
  327. TEXT tmp;
  328. } rewrites;
  329. struct {
  330. uint32_t used;
  331. RENAME array[MAX_RENAMES];
  332. } renames;
  333. } LOG_JOB;
  334. // initialize a log job
  335. void log_job_init(LOG_JOB *jb);
  336. // free all resources consumed by the log job
  337. void log_job_cleanup(LOG_JOB *jb);
  338. // ----------------------------------------------------------------------------
  339. // the entry point to send key value pairs to the output
  340. // this implements the pipeline of processing renames, rewrites and duplications
  341. void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len);
  342. // ----------------------------------------------------------------------------
  343. // configuration related
  344. // management of configuration to set settings
  345. bool log_job_filename_key_set(LOG_JOB *jb, const char *key, size_t key_len);
  346. bool log_job_key_prefix_set(LOG_JOB *jb, const char *prefix, size_t prefix_len);
  347. bool log_job_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
  348. bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched);
  349. bool log_job_rewrite_add(LOG_JOB *jb, const char *key, RW_FLAGS flags, const char *search_pattern, const char *replace_pattern);
  350. bool log_job_rename_add(LOG_JOB *jb, const char *new_key, size_t new_key_len, const char *old_key, size_t old_key_len);
  351. bool log_job_include_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
  352. bool log_job_exclude_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
  353. // entry point to parse command line parameters
  354. bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv);
  355. void log_job_command_line_help(const char *name);
  356. // ----------------------------------------------------------------------------
  357. // YAML configuration related
  358. #ifdef HAVE_LIBYAML
  359. bool yaml_parse_file(const char *config_file_path, LOG_JOB *jb);
  360. bool yaml_parse_config(const char *config_name, LOG_JOB *jb);
  361. #endif
  362. void log_job_configuration_to_yaml(LOG_JOB *jb);
  363. // ----------------------------------------------------------------------------
  364. // JSON parser
  365. typedef struct log_json_state LOG_JSON_STATE;
  366. LOG_JSON_STATE *json_parser_create(LOG_JOB *jb);
  367. void json_parser_destroy(LOG_JSON_STATE *js);
  368. const char *json_parser_error(LOG_JSON_STATE *js);
  369. bool json_parse_document(LOG_JSON_STATE *js, const char *txt);
  370. void json_test(void);
  371. size_t parse_surrogate(const char *s, char *d, size_t *remaining);
  372. // ----------------------------------------------------------------------------
  373. // logfmt parser
  374. typedef struct logfmt_state LOGFMT_STATE;
  375. LOGFMT_STATE *logfmt_parser_create(LOG_JOB *jb);
  376. void logfmt_parser_destroy(LOGFMT_STATE *lfs);
  377. const char *logfmt_parser_error(LOGFMT_STATE *lfs);
  378. bool logfmt_parse_document(LOGFMT_STATE *js, const char *txt);
  379. void logfmt_test(void);
  380. // ----------------------------------------------------------------------------
  381. // pcre2 parser
  382. typedef struct pcre2_state PCRE2_STATE;
  383. PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb);
  384. void pcre2_parser_destroy(PCRE2_STATE *pcre2);
  385. const char *pcre2_parser_error(PCRE2_STATE *pcre2);
  386. bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len);
  387. bool pcre2_has_error(PCRE2_STATE *pcre2);
  388. void pcre2_test(void);
  389. void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos);
  390. #endif //NETDATA_LOG2JOURNAL_H