log2journal-logfmt.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "log2journal.h"
  3. #define ERROR_LINE_MAX 1024
  4. #define KEY_MAX 1024
  5. struct logfmt_state {
  6. const char *line;
  7. size_t pos;
  8. char msg[ERROR_LINE_MAX];
  9. char key[KEY_MAX];
  10. size_t key_start;
  11. struct log_job *jb;
  12. };
  13. #define logfmt_current_pos(lfs) &(lfs)->line[(lfs)->pos]
  14. #define logfmt_consume_char(lfs) ++(lfs)->pos
  15. static inline void logfmt_process_key_value(LOGFMT_STATE *lfs, const char *value, size_t len) {
  16. jb_send_extracted_key_value(lfs->jb, lfs->key, value, len);
  17. }
  18. static inline void logfmt_skip_spaces(LOGFMT_STATE *lfs) {
  19. const char *s = logfmt_current_pos(lfs);
  20. const char *start = s;
  21. while(isspace(*s)) s++;
  22. lfs->pos += s - start;
  23. }
  24. static inline bool logftm_parse_value(LOGFMT_STATE *lfs) {
  25. static __thread char value[MAX_VALUE_LEN];
  26. char quote = '\0';
  27. const char *s = logfmt_current_pos(lfs);
  28. if(*s == '\"' || *s == '\'') {
  29. quote = *s;
  30. logfmt_consume_char(lfs);
  31. }
  32. value[0] = '\0';
  33. char *d = value;
  34. s = logfmt_current_pos(lfs);
  35. size_t remaining = sizeof(value);
  36. char end_char = (char)(quote == '\0' ? ' ' : quote);
  37. while (*s && *s != end_char) {
  38. char c;
  39. if (*s == '\\') {
  40. s++;
  41. switch (*s) {
  42. case 'n':
  43. c = '\n';
  44. s++;
  45. break;
  46. case 't':
  47. c = '\t';
  48. s++;
  49. break;
  50. case 'b':
  51. c = '\b';
  52. s++;
  53. break;
  54. case 'f':
  55. c = '\f';
  56. s++;
  57. break;
  58. case 'r':
  59. c = '\r';
  60. s++;
  61. break;
  62. default:
  63. c = *s++;
  64. break;
  65. }
  66. }
  67. else
  68. c = *s++;
  69. if(remaining < 2) {
  70. snprintf(lfs->msg, sizeof(lfs->msg),
  71. "LOGFMT PARSER: truncated string value at pos %zu", lfs->pos);
  72. return false;
  73. }
  74. else {
  75. *d++ = c;
  76. remaining--;
  77. }
  78. }
  79. *d = '\0';
  80. lfs->pos += s - logfmt_current_pos(lfs);
  81. s = logfmt_current_pos(lfs);
  82. if(quote != '\0') {
  83. if (*s != quote) {
  84. snprintf(lfs->msg, sizeof(lfs->msg),
  85. "LOGFMT PARSER: missing quote at pos %zu: '%s'",
  86. lfs->pos, s);
  87. return false;
  88. }
  89. else
  90. logfmt_consume_char(lfs);
  91. }
  92. if(d > value)
  93. logfmt_process_key_value(lfs, value, d - value);
  94. return true;
  95. }
  96. static inline bool logfmt_parse_key(LOGFMT_STATE *lfs) {
  97. static const char valid_journal_key_chars[256] = {
  98. // control characters
  99. [0] = '\0', [1] = '_', [2] = '_', [3] = '_', [4] = '_', [5] = '_', [6] = '_', [7] = '_',
  100. [8] = '_', [9] = '_', [10] = '_', [11] = '_', [12] = '_', [13] = '_', [14] = '_', [15] = '_',
  101. [16] = '_', [17] = '_', [18] = '_', [19] = '_', [20] = '_', [21] = '_', [22] = '_', [23] = '_',
  102. [24] = '_', [25] = '_', [26] = '_', [27] = '_', [28] = '_', [29] = '_', [30] = '_', [31] = '_',
  103. // symbols
  104. [' '] = '_', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_',
  105. ['('] = '_', [')'] = '_', ['*'] = '_', ['+'] = '_', [','] = '_', ['-'] = '_', ['.'] = '_', ['/'] = '_',
  106. // numbers
  107. ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
  108. ['8'] = '8', ['9'] = '9',
  109. // symbols
  110. [':'] = '_', [';'] = '_', ['<'] = '_', ['='] = '_', ['>'] = '_', ['?'] = '_', ['@'] = '_',
  111. // capitals
  112. ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
  113. ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
  114. ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
  115. ['Y'] = 'Y', ['Z'] = 'Z',
  116. // symbols
  117. ['['] = '_', ['\\'] = '_', [']'] = '_', ['^'] = '_', ['_'] = '_', ['`'] = '_',
  118. // lower to upper
  119. ['a'] = 'A', ['b'] = 'B', ['c'] = 'C', ['d'] = 'D', ['e'] = 'E', ['f'] = 'F', ['g'] = 'G', ['h'] = 'H',
  120. ['i'] = 'I', ['j'] = 'J', ['k'] = 'K', ['l'] = 'L', ['m'] = 'M', ['n'] = 'N', ['o'] = 'O', ['p'] = 'P',
  121. ['q'] = 'Q', ['r'] = 'R', ['s'] = 'S', ['t'] = 'T', ['u'] = 'U', ['v'] = 'V', ['w'] = 'W', ['x'] = 'X',
  122. ['y'] = 'Y', ['z'] = 'Z',
  123. // symbols
  124. ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_', [127] = '_', // Delete (DEL)
  125. // Extended ASCII characters (128-255) set to underscore
  126. [128] = '_', [129] = '_', [130] = '_', [131] = '_', [132] = '_', [133] = '_', [134] = '_', [135] = '_',
  127. [136] = '_', [137] = '_', [138] = '_', [139] = '_', [140] = '_', [141] = '_', [142] = '_', [143] = '_',
  128. [144] = '_', [145] = '_', [146] = '_', [147] = '_', [148] = '_', [149] = '_', [150] = '_', [151] = '_',
  129. [152] = '_', [153] = '_', [154] = '_', [155] = '_', [156] = '_', [157] = '_', [158] = '_', [159] = '_',
  130. [160] = '_', [161] = '_', [162] = '_', [163] = '_', [164] = '_', [165] = '_', [166] = '_', [167] = '_',
  131. [168] = '_', [169] = '_', [170] = '_', [171] = '_', [172] = '_', [173] = '_', [174] = '_', [175] = '_',
  132. [176] = '_', [177] = '_', [178] = '_', [179] = '_', [180] = '_', [181] = '_', [182] = '_', [183] = '_',
  133. [184] = '_', [185] = '_', [186] = '_', [187] = '_', [188] = '_', [189] = '_', [190] = '_', [191] = '_',
  134. [192] = '_', [193] = '_', [194] = '_', [195] = '_', [196] = '_', [197] = '_', [198] = '_', [199] = '_',
  135. [200] = '_', [201] = '_', [202] = '_', [203] = '_', [204] = '_', [205] = '_', [206] = '_', [207] = '_',
  136. [208] = '_', [209] = '_', [210] = '_', [211] = '_', [212] = '_', [213] = '_', [214] = '_', [215] = '_',
  137. [216] = '_', [217] = '_', [218] = '_', [219] = '_', [220] = '_', [221] = '_', [222] = '_', [223] = '_',
  138. [224] = '_', [225] = '_', [226] = '_', [227] = '_', [228] = '_', [229] = '_', [230] = '_', [231] = '_',
  139. [232] = '_', [233] = '_', [234] = '_', [235] = '_', [236] = '_', [237] = '_', [238] = '_', [239] = '_',
  140. [240] = '_', [241] = '_', [242] = '_', [243] = '_', [244] = '_', [245] = '_', [246] = '_', [247] = '_',
  141. [248] = '_', [249] = '_', [250] = '_', [251] = '_', [252] = '_', [253] = '_', [254] = '_', [255] = '_',
  142. };
  143. logfmt_skip_spaces(lfs);
  144. char *d = &lfs->key[lfs->key_start];
  145. size_t remaining = sizeof(lfs->key) - (d - lfs->key);
  146. const char *s = logfmt_current_pos(lfs);
  147. char last_c = '\0';
  148. while(*s && *s != '=') {
  149. char c;
  150. if (*s == '\\')
  151. s++;
  152. c = valid_journal_key_chars[(unsigned char)*s++];
  153. if(c == '_' && last_c == '_')
  154. continue;
  155. else {
  156. if(remaining < 2) {
  157. snprintf(lfs->msg, sizeof(lfs->msg),
  158. "LOGFMT PARSER: key buffer full - keys are too long, at pos %zu", lfs->pos);
  159. return false;
  160. }
  161. *d++ = c;
  162. remaining--;
  163. }
  164. last_c = c;
  165. }
  166. *d = '\0';
  167. lfs->pos += s - logfmt_current_pos(lfs);
  168. s = logfmt_current_pos(lfs);
  169. if(*s != '=') {
  170. snprintf(lfs->msg, sizeof(lfs->msg),
  171. "LOGFMT PARSER: key is missing the equal sign, at pos %zu", lfs->pos);
  172. return false;
  173. }
  174. logfmt_consume_char(lfs);
  175. return true;
  176. }
  177. LOGFMT_STATE *logfmt_parser_create(struct log_job *jb) {
  178. LOGFMT_STATE *lfs = mallocz(sizeof(LOGFMT_STATE));
  179. memset(lfs, 0, sizeof(LOGFMT_STATE));
  180. lfs->jb = jb;
  181. if(jb->prefix)
  182. lfs->key_start = copy_to_buffer(lfs->key, sizeof(lfs->key), lfs->jb->prefix, strlen(lfs->jb->prefix));
  183. return lfs;
  184. }
  185. void logfmt_parser_destroy(LOGFMT_STATE *lfs) {
  186. if(lfs)
  187. freez(lfs);
  188. }
  189. const char *logfmt_parser_error(LOGFMT_STATE *lfs) {
  190. return lfs->msg;
  191. }
  192. bool logfmt_parse_document(LOGFMT_STATE *lfs, const char *txt) {
  193. lfs->line = txt;
  194. lfs->pos = 0;
  195. lfs->msg[0] = '\0';
  196. const char *s;
  197. do {
  198. if(!logfmt_parse_key(lfs))
  199. return false;
  200. if(!logftm_parse_value(lfs))
  201. return false;
  202. logfmt_skip_spaces(lfs);
  203. s = logfmt_current_pos(lfs);
  204. } while(*s);
  205. return true;
  206. }
  207. void logfmt_test(void) {
  208. struct log_job jb = { .prefix = "NIGNX_" };
  209. LOGFMT_STATE *logfmt = logfmt_parser_create(&jb);
  210. logfmt_parse_document(logfmt, "x=1 y=2 z=\"3 \\ 4\" 5 ");
  211. logfmt_parser_destroy(logfmt);
  212. }