log2journal.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #ifndef NETDATA_LOG2JOURNAL_H
  3. #define NETDATA_LOG2JOURNAL_H
  4. // only for PACKAGE_VERSION
  5. #include <config.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <dirent.h>
  9. #include <string.h>
  10. #include <stdbool.h>
  11. #include <string.h>
  12. #include <ctype.h>
  13. #include <math.h>
  14. #include <stdarg.h>
  15. #include <assert.h>
  16. // ----------------------------------------------------------------------------
  17. // logging
  18. // enable the compiler to check for printf like errors on our log2stderr() function
  19. static inline void log2stderr(const char *format, ...) __attribute__ ((format(__printf__, 1, 2)));
  20. static inline void log2stderr(const char *format, ...) {
  21. va_list args;
  22. va_start(args, format);
  23. vfprintf(stderr, format, args);
  24. va_end(args);
  25. fprintf(stderr, "\n");
  26. }
  27. // ----------------------------------------------------------------------------
  28. // allocation functions abstraction
  29. static inline void *mallocz(size_t size) {
  30. void *ptr = malloc(size);
  31. if (!ptr) {
  32. log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", size);
  33. exit(EXIT_FAILURE);
  34. }
  35. return ptr;
  36. }
  37. static inline void *callocz(size_t elements, size_t size) {
  38. void *ptr = calloc(elements, size);
  39. if (!ptr) {
  40. log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", elements * size);
  41. exit(EXIT_FAILURE);
  42. }
  43. return ptr;
  44. }
  45. static inline void *reallocz(void *ptr, size_t size) {
  46. void *new_ptr = realloc(ptr, size);
  47. if (!new_ptr) {
  48. log2stderr("Fatal Error: Memory reallocation failed. Requested size: %zu bytes.", size);
  49. exit(EXIT_FAILURE);
  50. }
  51. return new_ptr;
  52. }
  53. static inline char *strdupz(const char *s) {
  54. char *ptr = strdup(s);
  55. if (!ptr) {
  56. log2stderr("Fatal Error: Memory allocation failed in strdup.");
  57. exit(EXIT_FAILURE);
  58. }
  59. return ptr;
  60. }
  61. static inline char *strndupz(const char *s, size_t n) {
  62. char *ptr = strndup(s, n);
  63. if (!ptr) {
  64. log2stderr("Fatal Error: Memory allocation failed in strndup. Requested size: %zu bytes.", n);
  65. exit(EXIT_FAILURE);
  66. }
  67. return ptr;
  68. }
  69. static inline void freez(void *ptr) {
  70. if (ptr)
  71. free(ptr);
  72. }
  73. // ----------------------------------------------------------------------------
  74. #define XXH_INLINE_ALL
  75. #include "../../libnetdata/xxhash.h"
  76. #define PCRE2_CODE_UNIT_WIDTH 8
  77. #include <pcre2.h>
  78. #ifdef HAVE_LIBYAML
  79. #include <yaml.h>
  80. #endif
  81. // ----------------------------------------------------------------------------
  82. // hashtable for HASHED_KEY
  83. // cleanup hashtable defines
  84. #include "../../libnetdata/simple_hashtable_undef.h"
  85. struct hashed_key;
  86. static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2);
  87. #define SIMPLE_HASHTABLE_SORT_FUNCTION compare_keys
  88. #define SIMPLE_HASHTABLE_VALUE_TYPE struct hashed_key
  89. #define SIMPLE_HASHTABLE_NAME _KEY
  90. #include "../../libnetdata/simple_hashtable.h"
  91. // ----------------------------------------------------------------------------
  92. #define MAX_OUTPUT_KEYS 1024
  93. #define MAX_LINE_LENGTH (1024 * 1024)
  94. #define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2)
  95. #define MAX_REWRITES (MAX_OUTPUT_KEYS / 2)
  96. #define MAX_RENAMES (MAX_OUTPUT_KEYS / 2)
  97. #define JOURNAL_MAX_KEY_LEN 64 // according to systemd-journald
  98. #define JOURNAL_MAX_VALUE_LEN (48 * 1024) // according to systemd-journald
  99. #define LOG2JOURNAL_CONFIG_PATH LIBCONFIG_DIR "/log2journal.d"
  100. // ----------------------------------------------------------------------------
  101. // character conversion for journal keys
  102. extern const char journal_key_characters_map[256];
  103. // ----------------------------------------------------------------------------
  104. // copy to buffer, while ensuring there is no buffer overflow
  105. static inline size_t copy_to_buffer(char *dst, size_t dst_size, const char *src, size_t src_len) {
  106. if(dst_size < 2) {
  107. if(dst_size == 1)
  108. *dst = '\0';
  109. return 0;
  110. }
  111. if(src_len <= dst_size - 1) {
  112. memcpy(dst, src, src_len);
  113. dst[src_len] = '\0';
  114. return src_len;
  115. }
  116. else {
  117. memcpy(dst, src, dst_size - 1);
  118. dst[dst_size - 1] = '\0';
  119. return dst_size - 1;
  120. }
  121. }
  122. // ----------------------------------------------------------------------------
  123. // A dynamically sized, reusable text buffer,
  124. // allowing us to be fast (no allocations during iterations) while having the
  125. // smallest possible allocations.
  126. typedef struct txt {
  127. char *txt;
  128. uint32_t size;
  129. uint32_t len;
  130. } TEXT;
  131. static inline void txt_cleanup(TEXT *t) {
  132. if(!t)
  133. return;
  134. if(t->txt)
  135. freez(t->txt);
  136. t->txt = NULL;
  137. t->size = 0;
  138. t->len = 0;
  139. }
  140. static inline void txt_replace(TEXT *t, const char *s, size_t len) {
  141. if(!s || !*s || len == 0) {
  142. s = "";
  143. len = 0;
  144. }
  145. if(len + 1 <= t->size) {
  146. // the existing value allocation, fits our value
  147. memcpy(t->txt, s, len);
  148. t->txt[len] = '\0';
  149. t->len = len;
  150. }
  151. else {
  152. // no existing value allocation, or too small for our value
  153. // cleanup and increase the buffer
  154. txt_cleanup(t);
  155. t->txt = strndupz(s, len);
  156. t->size = len + 1;
  157. t->len = len;
  158. }
  159. }
  160. static inline void txt_expand_and_append(TEXT *t, const char *s, size_t len) {
  161. if(len + 1 > (t->size - t->len)) {
  162. size_t new_size = t->len + len + 1;
  163. if(new_size < t->size * 2)
  164. new_size = t->size * 2;
  165. t->txt = reallocz(t->txt, new_size);
  166. t->size = new_size;
  167. }
  168. char *copy_to = &t->txt[t->len];
  169. memcpy(copy_to, s, len);
  170. copy_to[len] = '\0';
  171. t->len += len;
  172. }
  173. // ----------------------------------------------------------------------------
  174. typedef enum __attribute__((__packed__)) {
  175. HK_NONE = 0,
  176. // permanent flags - they are set once to optimize various decisions and lookups
  177. HK_HASHTABLE_ALLOCATED = (1 << 0), // this is key object allocated in the hashtable
  178. // objects that do not have this, have a pointer to a key in the hashtable
  179. // objects that have this, value a value allocated
  180. HK_FILTERED = (1 << 1), // we checked once if this key in filtered
  181. HK_FILTERED_INCLUDED = (1 << 2), // the result of the filtering was to include it in the output
  182. HK_COLLISION_CHECKED = (1 << 3), // we checked once for collision check of this key
  183. HK_RENAMES_CHECKED = (1 << 4), // we checked once if there are renames on this key
  184. HK_HAS_RENAMES = (1 << 5), // and we found there is a rename rule related to it
  185. // ephemeral flags - they are unset at the end of each log line
  186. HK_VALUE_FROM_LOG = (1 << 14), // the value of this key has been read from the log (or from injection, duplication)
  187. HK_VALUE_REWRITTEN = (1 << 15), // the value of this key has been rewritten due to one of our rewrite rules
  188. } HASHED_KEY_FLAGS;
  189. typedef struct hashed_key {
  190. const char *key;
  191. uint32_t len;
  192. HASHED_KEY_FLAGS flags;
  193. XXH64_hash_t hash;
  194. union {
  195. struct hashed_key *hashtable_ptr; // HK_HASHTABLE_ALLOCATED is not set
  196. TEXT value; // HK_HASHTABLE_ALLOCATED is set
  197. };
  198. } HASHED_KEY;
  199. static inline void hashed_key_cleanup(HASHED_KEY *k) {
  200. if(k->key) {
  201. freez((void *)k->key);
  202. k->key = NULL;
  203. }
  204. if(k->flags & HK_HASHTABLE_ALLOCATED)
  205. txt_cleanup(&k->value);
  206. else
  207. k->hashtable_ptr = NULL;
  208. }
  209. static inline void hashed_key_set(HASHED_KEY *k, const char *name) {
  210. hashed_key_cleanup(k);
  211. k->key = strdupz(name);
  212. k->len = strlen(k->key);
  213. k->hash = XXH3_64bits(k->key, k->len);
  214. k->flags = HK_NONE;
  215. }
  216. static inline void hashed_key_len_set(HASHED_KEY *k, const char *name, size_t len) {
  217. hashed_key_cleanup(k);
  218. k->key = strndupz(name, len);
  219. k->len = len;
  220. k->hash = XXH3_64bits(k->key, k->len);
  221. k->flags = HK_NONE;
  222. }
  223. static inline bool hashed_keys_match(HASHED_KEY *k1, HASHED_KEY *k2) {
  224. return ((k1 == k2) || (k1->hash == k2->hash && strcmp(k1->key, k2->key) == 0));
  225. }
  226. static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2) {
  227. return strcmp(k1->key, k2->key);
  228. }
  229. // ----------------------------------------------------------------------------
  230. typedef struct search_pattern {
  231. const char *pattern;
  232. pcre2_code *re;
  233. pcre2_match_data *match_data;
  234. TEXT error;
  235. } SEARCH_PATTERN;
  236. void search_pattern_cleanup(SEARCH_PATTERN *sp);
  237. bool search_pattern_set(SEARCH_PATTERN *sp, const char *search_pattern, size_t search_pattern_len);
  238. static inline bool search_pattern_matches(SEARCH_PATTERN *sp, const char *value, size_t value_len) {
  239. return pcre2_match(sp->re, (PCRE2_SPTR)value, value_len, 0, 0, sp->match_data, NULL) >= 0;
  240. }
  241. // ----------------------------------------------------------------------------
  242. typedef struct replacement_node {
  243. HASHED_KEY name;
  244. bool is_variable;
  245. bool logged_error;
  246. struct replacement_node *next;
  247. } REPLACE_NODE;
  248. void replace_node_free(REPLACE_NODE *rpn);
  249. typedef struct replace_pattern {
  250. const char *pattern;
  251. REPLACE_NODE *nodes;
  252. bool has_variables;
  253. } REPLACE_PATTERN;
  254. void replace_pattern_cleanup(REPLACE_PATTERN *rp);
  255. bool replace_pattern_set(REPLACE_PATTERN *rp, const char *pattern);
  256. // ----------------------------------------------------------------------------
  257. typedef struct injection {
  258. bool on_unmatched;
  259. HASHED_KEY key;
  260. REPLACE_PATTERN value;
  261. } INJECTION;
  262. void injection_cleanup(INJECTION *inj);
  263. // ----------------------------------------------------------------------------
  264. typedef struct key_rename {
  265. HASHED_KEY new_key;
  266. HASHED_KEY old_key;
  267. } RENAME;
  268. void rename_cleanup(RENAME *rn);
  269. // ----------------------------------------------------------------------------
  270. typedef enum __attribute__((__packed__)) {
  271. RW_NONE = 0,
  272. RW_MATCH_PCRE2 = (1 << 1), // a rewrite rule
  273. RW_MATCH_NON_EMPTY = (1 << 2), // a rewrite rule
  274. RW_DONT_STOP = (1 << 3),
  275. RW_INJECT = (1 << 4),
  276. } RW_FLAGS;
  277. typedef struct key_rewrite {
  278. RW_FLAGS flags;
  279. HASHED_KEY key;
  280. union {
  281. SEARCH_PATTERN match_pcre2;
  282. REPLACE_PATTERN match_non_empty;
  283. };
  284. REPLACE_PATTERN value;
  285. } REWRITE;
  286. void rewrite_cleanup(REWRITE *rw);
  287. // ----------------------------------------------------------------------------
  288. // A job configuration and runtime structures
  289. typedef struct log_job {
  290. bool show_config;
  291. const char *pattern;
  292. const char *prefix;
  293. SIMPLE_HASHTABLE_KEY hashtable;
  294. struct {
  295. const char *buffer;
  296. const char *trimmed;
  297. size_t trimmed_len;
  298. size_t size;
  299. HASHED_KEY key;
  300. } line;
  301. struct {
  302. SEARCH_PATTERN include;
  303. SEARCH_PATTERN exclude;
  304. } filter;
  305. struct {
  306. bool last_line_was_empty;
  307. HASHED_KEY key;
  308. TEXT current;
  309. } filename;
  310. struct {
  311. uint32_t used;
  312. INJECTION keys[MAX_INJECTIONS];
  313. } injections;
  314. struct {
  315. HASHED_KEY key;
  316. struct {
  317. uint32_t used;
  318. INJECTION keys[MAX_INJECTIONS];
  319. } injections;
  320. } unmatched;
  321. struct {
  322. uint32_t used;
  323. REWRITE array[MAX_REWRITES];
  324. TEXT tmp;
  325. } rewrites;
  326. struct {
  327. uint32_t used;
  328. RENAME array[MAX_RENAMES];
  329. } renames;
  330. } LOG_JOB;
  331. // initialize a log job
  332. void log_job_init(LOG_JOB *jb);
  333. // free all resources consumed by the log job
  334. void log_job_cleanup(LOG_JOB *jb);
  335. // ----------------------------------------------------------------------------
  336. // the entry point to send key value pairs to the output
  337. // this implements the pipeline of processing renames, rewrites and duplications
  338. void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len);
  339. // ----------------------------------------------------------------------------
  340. // configuration related
  341. // management of configuration to set settings
  342. bool log_job_filename_key_set(LOG_JOB *jb, const char *key, size_t key_len);
  343. bool log_job_key_prefix_set(LOG_JOB *jb, const char *prefix, size_t prefix_len);
  344. bool log_job_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
  345. bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched);
  346. bool log_job_rewrite_add(LOG_JOB *jb, const char *key, RW_FLAGS flags, const char *search_pattern, const char *replace_pattern);
  347. bool log_job_rename_add(LOG_JOB *jb, const char *new_key, size_t new_key_len, const char *old_key, size_t old_key_len);
  348. bool log_job_include_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
  349. bool log_job_exclude_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
  350. // entry point to parse command line parameters
  351. bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv);
  352. void log_job_command_line_help(const char *name);
  353. // ----------------------------------------------------------------------------
  354. // YAML configuration related
  355. #ifdef HAVE_LIBYAML
  356. bool yaml_parse_file(const char *config_file_path, LOG_JOB *jb);
  357. bool yaml_parse_config(const char *config_name, LOG_JOB *jb);
  358. #endif
  359. void log_job_configuration_to_yaml(LOG_JOB *jb);
  360. // ----------------------------------------------------------------------------
  361. // JSON parser
  362. typedef struct log_json_state LOG_JSON_STATE;
  363. LOG_JSON_STATE *json_parser_create(LOG_JOB *jb);
  364. void json_parser_destroy(LOG_JSON_STATE *js);
  365. const char *json_parser_error(LOG_JSON_STATE *js);
  366. bool json_parse_document(LOG_JSON_STATE *js, const char *txt);
  367. void json_test(void);
  368. size_t parse_surrogate(const char *s, char *d, size_t *remaining);
  369. // ----------------------------------------------------------------------------
  370. // logfmt parser
  371. typedef struct logfmt_state LOGFMT_STATE;
  372. LOGFMT_STATE *logfmt_parser_create(LOG_JOB *jb);
  373. void logfmt_parser_destroy(LOGFMT_STATE *lfs);
  374. const char *logfmt_parser_error(LOGFMT_STATE *lfs);
  375. bool logfmt_parse_document(LOGFMT_STATE *js, const char *txt);
  376. void logfmt_test(void);
  377. // ----------------------------------------------------------------------------
  378. // pcre2 parser
  379. typedef struct pcre2_state PCRE2_STATE;
  380. PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb);
  381. void pcre2_parser_destroy(PCRE2_STATE *pcre2);
  382. const char *pcre2_parser_error(PCRE2_STATE *pcre2);
  383. bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len);
  384. bool pcre2_has_error(PCRE2_STATE *pcre2);
  385. void pcre2_test(void);
  386. void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos);
  387. #endif //NETDATA_LOG2JOURNAL_H