log2journal-json.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "log2journal.h"
  3. #define ERROR_LINE_MAX 1024
  4. #define KEY_MAX 1024
  5. #define JSON_DEPTH_MAX 100
  6. struct log_json_state {
  7. const char *line;
  8. size_t pos;
  9. char msg[ERROR_LINE_MAX];
  10. char key[KEY_MAX];
  11. char *key_stack[JSON_DEPTH_MAX];
  12. size_t depth;
  13. struct log_job *jb;
  14. };
  15. static inline bool json_parse_object(LOG_JSON_STATE *js);
  16. static inline bool json_parse_array(LOG_JSON_STATE *js);
  17. #define json_current_pos(js) &(js)->line[(js)->pos]
  18. #define json_consume_char(js) ++(js)->pos
  19. static inline void json_process_key_value(LOG_JSON_STATE *js, const char *value, size_t len) {
  20. jb_send_extracted_key_value(js->jb, js->key, value, len);
  21. }
  22. static inline void json_skip_spaces(LOG_JSON_STATE *js) {
  23. const char *s = json_current_pos(js);
  24. const char *start = s;
  25. while(isspace(*s)) s++;
  26. js->pos += s - start;
  27. }
  28. static inline bool json_expect_char_after_white_space(LOG_JSON_STATE *js, const char *expected) {
  29. json_skip_spaces(js);
  30. const char *s = json_current_pos(js);
  31. for(const char *e = expected; *e ;e++) {
  32. if (*s == *e)
  33. return true;
  34. }
  35. snprintf(js->msg, sizeof(js->msg),
  36. "JSON PARSER: character '%c' is not one of the expected characters (%s), at pos %zu",
  37. *s ? *s : '?', expected, js->pos);
  38. return false;
  39. }
  40. static inline bool json_parse_null(LOG_JSON_STATE *js) {
  41. const char *s = json_current_pos(js);
  42. if (strncmp(s, "null", 4) == 0) {
  43. json_process_key_value(js, "null", 4);
  44. js->pos += 4;
  45. return true;
  46. }
  47. else {
  48. snprintf(js->msg, sizeof(js->msg),
  49. "JSON PARSER: expected 'null', found '%.4s' at position %zu", s, js->pos);
  50. return false;
  51. }
  52. }
  53. static inline bool json_parse_true(LOG_JSON_STATE *js) {
  54. const char *s = json_current_pos(js);
  55. if (strncmp(s, "true", 4) == 0) {
  56. json_process_key_value(js, "true", 4);
  57. js->pos += 4;
  58. return true;
  59. }
  60. else {
  61. snprintf(js->msg, sizeof(js->msg),
  62. "JSON PARSER: expected 'true', found '%.4s' at position %zu", s, js->pos);
  63. return false;
  64. }
  65. }
  66. static inline bool json_parse_false(LOG_JSON_STATE *js) {
  67. const char *s = json_current_pos(js);
  68. if (strncmp(s, "false", 5) == 0) {
  69. json_process_key_value(js, "false", 5);
  70. js->pos += 5;
  71. return true;
  72. }
  73. else {
  74. snprintf(js->msg, sizeof(js->msg),
  75. "JSON PARSER: expected 'false', found '%.4s' at position %zu", s, js->pos);
  76. return false;
  77. }
  78. }
  79. static inline bool json_parse_number(LOG_JSON_STATE *js) {
  80. static __thread char value[8192];
  81. value[0] = '\0';
  82. char *d = value;
  83. const char *s = json_current_pos(js);
  84. size_t remaining = sizeof(value) - 1; // Reserve space for null terminator
  85. // Optional minus sign
  86. if (*s == '-') {
  87. *d++ = *s++;
  88. remaining--;
  89. }
  90. // Digits before decimal point
  91. while (*s >= '0' && *s <= '9') {
  92. if (remaining < 2) {
  93. snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated number value at pos %zu", js->pos);
  94. return false;
  95. }
  96. *d++ = *s++;
  97. remaining--;
  98. }
  99. // Decimal point and fractional part
  100. if (*s == '.') {
  101. *d++ = *s++;
  102. remaining--;
  103. while (*s >= '0' && *s <= '9') {
  104. if (remaining < 2) {
  105. snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated fractional part at pos %zu", js->pos);
  106. return false;
  107. }
  108. *d++ = *s++;
  109. remaining--;
  110. }
  111. }
  112. // Exponent part
  113. if (*s == 'e' || *s == 'E') {
  114. *d++ = *s++;
  115. remaining--;
  116. // Optional sign in exponent
  117. if (*s == '+' || *s == '-') {
  118. *d++ = *s++;
  119. remaining--;
  120. }
  121. while (*s >= '0' && *s <= '9') {
  122. if (remaining < 2) {
  123. snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated exponent at pos %zu", js->pos);
  124. return false;
  125. }
  126. *d++ = *s++;
  127. remaining--;
  128. }
  129. }
  130. *d = '\0';
  131. js->pos += d - value;
  132. if (d > value) {
  133. json_process_key_value(js, value, d - value);
  134. return true;
  135. } else {
  136. snprintf(js->msg, sizeof(js->msg), "JSON PARSER: invalid number format at pos %zu", js->pos);
  137. return false;
  138. }
  139. }
  140. static bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) {
  141. if (codepoint <= 0x7F) {
  142. // 1-byte sequence
  143. if (*remaining < 2) return false; // +1 for the null
  144. *(*d)++ = (char)codepoint;
  145. (*remaining)--;
  146. }
  147. else if (codepoint <= 0x7FF) {
  148. // 2-byte sequence
  149. if (*remaining < 3) return false; // +1 for the null
  150. *(*d)++ = (char)(0xC0 | ((codepoint >> 6) & 0x1F));
  151. *(*d)++ = (char)(0x80 | (codepoint & 0x3F));
  152. (*remaining) -= 2;
  153. }
  154. else if (codepoint <= 0xFFFF) {
  155. // 3-byte sequence
  156. if (*remaining < 4) return false; // +1 for the null
  157. *(*d)++ = (char)(0xE0 | ((codepoint >> 12) & 0x0F));
  158. *(*d)++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));
  159. *(*d)++ = (char)(0x80 | (codepoint & 0x3F));
  160. (*remaining) -= 3;
  161. }
  162. else if (codepoint <= 0x10FFFF) {
  163. // 4-byte sequence
  164. if (*remaining < 5) return false; // +1 for the null
  165. *(*d)++ = (char)(0xF0 | ((codepoint >> 18) & 0x07));
  166. *(*d)++ = (char)(0x80 | ((codepoint >> 12) & 0x3F));
  167. *(*d)++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));
  168. *(*d)++ = (char)(0x80 | (codepoint & 0x3F));
  169. (*remaining) -= 4;
  170. }
  171. else
  172. // Invalid code point
  173. return false;
  174. return true;
  175. }
  176. static inline bool json_parse_string(LOG_JSON_STATE *js) {
  177. static __thread char value[MAX_VALUE_LEN];
  178. if(!json_expect_char_after_white_space(js, "\""))
  179. return false;
  180. json_consume_char(js);
  181. value[0] = '\0';
  182. char *d = value;
  183. const char *s = json_current_pos(js);
  184. size_t remaining = sizeof(value);
  185. while (*s && *s != '"') {
  186. char c;
  187. if (*s == '\\') {
  188. s++;
  189. switch (*s) {
  190. case 'n':
  191. c = '\n';
  192. s++;
  193. break;
  194. case 't':
  195. c = '\t';
  196. s++;
  197. break;
  198. case 'b':
  199. c = '\b';
  200. s++;
  201. break;
  202. case 'f':
  203. c = '\f';
  204. s++;
  205. break;
  206. case 'r':
  207. c = '\r';
  208. s++;
  209. break;
  210. case 'u':
  211. if(isxdigit(s[1]) && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4])) {
  212. char b[5] = {
  213. [0] = s[1],
  214. [1] = s[2],
  215. [2] = s[3],
  216. [3] = s[4],
  217. [4] = '\0',
  218. };
  219. unsigned codepoint = strtoul(b, NULL, 16);
  220. if(encode_utf8(codepoint, &d, &remaining)) {
  221. s += 5;
  222. continue;
  223. }
  224. else {
  225. *d++ = '\\';
  226. remaining--;
  227. c = *s++;
  228. }
  229. }
  230. else {
  231. *d++ = '\\';
  232. remaining--;
  233. c = *s++;
  234. }
  235. break;
  236. default:
  237. c = *s++;
  238. break;
  239. }
  240. }
  241. else
  242. c = *s++;
  243. if(remaining < 2) {
  244. snprintf(js->msg, sizeof(js->msg),
  245. "JSON PARSER: truncated string value at pos %zu", js->pos);
  246. return false;
  247. }
  248. else {
  249. *d++ = c;
  250. remaining--;
  251. }
  252. }
  253. *d = '\0';
  254. js->pos += s - json_current_pos(js);
  255. if(!json_expect_char_after_white_space(js, "\""))
  256. return false;
  257. json_consume_char(js);
  258. if(d > value)
  259. json_process_key_value(js, value, d - value);
  260. return true;
  261. }
  262. static inline bool json_parse_key_and_push(LOG_JSON_STATE *js) {
  263. static const char valid_journal_key_chars[256] = {
  264. // control characters
  265. [0] = '\0', [1] = '_', [2] = '_', [3] = '_', [4] = '_', [5] = '_', [6] = '_', [7] = '_',
  266. [8] = '_', [9] = '_', [10] = '_', [11] = '_', [12] = '_', [13] = '_', [14] = '_', [15] = '_',
  267. [16] = '_', [17] = '_', [18] = '_', [19] = '_', [20] = '_', [21] = '_', [22] = '_', [23] = '_',
  268. [24] = '_', [25] = '_', [26] = '_', [27] = '_', [28] = '_', [29] = '_', [30] = '_', [31] = '_',
  269. // symbols
  270. [' '] = '_', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_',
  271. ['('] = '_', [')'] = '_', ['*'] = '_', ['+'] = '_', [','] = '_', ['-'] = '_', ['.'] = '_', ['/'] = '_',
  272. // numbers
  273. ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
  274. ['8'] = '8', ['9'] = '9',
  275. // symbols
  276. [':'] = '_', [';'] = '_', ['<'] = '_', ['='] = '_', ['>'] = '_', ['?'] = '_', ['@'] = '_',
  277. // capitals
  278. ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
  279. ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
  280. ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
  281. ['Y'] = 'Y', ['Z'] = 'Z',
  282. // symbols
  283. ['['] = '_', ['\\'] = '_', [']'] = '_', ['^'] = '_', ['_'] = '_', ['`'] = '_',
  284. // lower to upper
  285. ['a'] = 'A', ['b'] = 'B', ['c'] = 'C', ['d'] = 'D', ['e'] = 'E', ['f'] = 'F', ['g'] = 'G', ['h'] = 'H',
  286. ['i'] = 'I', ['j'] = 'J', ['k'] = 'K', ['l'] = 'L', ['m'] = 'M', ['n'] = 'N', ['o'] = 'O', ['p'] = 'P',
  287. ['q'] = 'Q', ['r'] = 'R', ['s'] = 'S', ['t'] = 'T', ['u'] = 'U', ['v'] = 'V', ['w'] = 'W', ['x'] = 'X',
  288. ['y'] = 'Y', ['z'] = 'Z',
  289. // symbols
  290. ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_', [127] = '_', // Delete (DEL)
  291. // Extended ASCII characters (128-255) set to underscore
  292. [128] = '_', [129] = '_', [130] = '_', [131] = '_', [132] = '_', [133] = '_', [134] = '_', [135] = '_',
  293. [136] = '_', [137] = '_', [138] = '_', [139] = '_', [140] = '_', [141] = '_', [142] = '_', [143] = '_',
  294. [144] = '_', [145] = '_', [146] = '_', [147] = '_', [148] = '_', [149] = '_', [150] = '_', [151] = '_',
  295. [152] = '_', [153] = '_', [154] = '_', [155] = '_', [156] = '_', [157] = '_', [158] = '_', [159] = '_',
  296. [160] = '_', [161] = '_', [162] = '_', [163] = '_', [164] = '_', [165] = '_', [166] = '_', [167] = '_',
  297. [168] = '_', [169] = '_', [170] = '_', [171] = '_', [172] = '_', [173] = '_', [174] = '_', [175] = '_',
  298. [176] = '_', [177] = '_', [178] = '_', [179] = '_', [180] = '_', [181] = '_', [182] = '_', [183] = '_',
  299. [184] = '_', [185] = '_', [186] = '_', [187] = '_', [188] = '_', [189] = '_', [190] = '_', [191] = '_',
  300. [192] = '_', [193] = '_', [194] = '_', [195] = '_', [196] = '_', [197] = '_', [198] = '_', [199] = '_',
  301. [200] = '_', [201] = '_', [202] = '_', [203] = '_', [204] = '_', [205] = '_', [206] = '_', [207] = '_',
  302. [208] = '_', [209] = '_', [210] = '_', [211] = '_', [212] = '_', [213] = '_', [214] = '_', [215] = '_',
  303. [216] = '_', [217] = '_', [218] = '_', [219] = '_', [220] = '_', [221] = '_', [222] = '_', [223] = '_',
  304. [224] = '_', [225] = '_', [226] = '_', [227] = '_', [228] = '_', [229] = '_', [230] = '_', [231] = '_',
  305. [232] = '_', [233] = '_', [234] = '_', [235] = '_', [236] = '_', [237] = '_', [238] = '_', [239] = '_',
  306. [240] = '_', [241] = '_', [242] = '_', [243] = '_', [244] = '_', [245] = '_', [246] = '_', [247] = '_',
  307. [248] = '_', [249] = '_', [250] = '_', [251] = '_', [252] = '_', [253] = '_', [254] = '_', [255] = '_',
  308. };
  309. if (!json_expect_char_after_white_space(js, "\""))
  310. return false;
  311. if(js->depth >= JSON_DEPTH_MAX - 1) {
  312. snprintf(js->msg, sizeof(js->msg),
  313. "JSON PARSER: object too deep, at pos %zu", js->pos);
  314. return false;
  315. }
  316. json_consume_char(js);
  317. char *d = js->key_stack[js->depth];
  318. if(js->depth)
  319. *d++ = '_';
  320. size_t remaining = sizeof(js->key) - (d - js->key);
  321. const char *s = json_current_pos(js);
  322. char last_c = '\0';
  323. while(*s && *s != '\"') {
  324. char c;
  325. if (*s == '\\') {
  326. s++;
  327. c = (char)((*s == 'u') ? '_' : valid_journal_key_chars[(unsigned char)*s]);
  328. s += (*s == 'u') ? 5 : 1;
  329. }
  330. else
  331. c = valid_journal_key_chars[(unsigned char)*s++];
  332. if(c == '_' && last_c == '_')
  333. continue;
  334. else {
  335. if(remaining < 2) {
  336. snprintf(js->msg, sizeof(js->msg),
  337. "JSON PARSER: key buffer full - keys are too long, at pos %zu", js->pos);
  338. return false;
  339. }
  340. *d++ = c;
  341. remaining--;
  342. }
  343. last_c = c;
  344. }
  345. *d = '\0';
  346. js->pos += s - json_current_pos(js);
  347. if (!json_expect_char_after_white_space(js, "\""))
  348. return false;
  349. json_consume_char(js);
  350. js->key_stack[++js->depth] = d;
  351. return true;
  352. }
  353. static inline bool json_key_pop(LOG_JSON_STATE *js) {
  354. if(js->depth <= 0) {
  355. snprintf(js->msg, sizeof(js->msg),
  356. "JSON PARSER: cannot pop a key at depth %zu, at pos %zu", js->depth, js->pos);
  357. return false;
  358. }
  359. char *k = js->key_stack[js->depth--];
  360. *k = '\0';
  361. return true;
  362. }
  363. static inline bool json_parse_value(LOG_JSON_STATE *js) {
  364. if(!json_expect_char_after_white_space(js, "-.0123456789tfn\"{["))
  365. return false;
  366. const char *s = json_current_pos(js);
  367. switch(*s) {
  368. case '-':
  369. case '0':
  370. case '1':
  371. case '2':
  372. case '3':
  373. case '4':
  374. case '5':
  375. case '6':
  376. case '7':
  377. case '8':
  378. case '9':
  379. return json_parse_number(js);
  380. case 't':
  381. return json_parse_true(js);
  382. case 'f':
  383. return json_parse_false(js);
  384. case 'n':
  385. return json_parse_null(js);
  386. case '"':
  387. return json_parse_string(js);
  388. case '{':
  389. return json_parse_object(js);
  390. case '[':
  391. return json_parse_array(js);
  392. }
  393. snprintf(js->msg, sizeof(js->msg),
  394. "JSON PARSER: unexpected character at pos %zu", js->pos);
  395. return false;
  396. }
  397. static inline bool json_key_index_and_push(LOG_JSON_STATE *js, size_t index) {
  398. char *d = js->key_stack[js->depth];
  399. if(js->depth > 0) {
  400. *d++ = '_';
  401. }
  402. // Convert index to string manually
  403. char temp[32];
  404. char *t = temp + sizeof(temp) - 1; // Start at the end of the buffer
  405. *t = '\0';
  406. do {
  407. *--t = (char)((index % 10) + '0');
  408. index /= 10;
  409. } while (index > 0);
  410. size_t remaining = sizeof(js->key) - (d - js->key);
  411. // Append the index to the key
  412. while (*t) {
  413. if(remaining < 2) {
  414. snprintf(js->msg, sizeof(js->msg),
  415. "JSON PARSER: key buffer full - keys are too long, at pos %zu", js->pos);
  416. return false;
  417. }
  418. *d++ = *t++;
  419. remaining--;
  420. }
  421. *d = '\0'; // Null-terminate the key
  422. js->key_stack[++js->depth] = d;
  423. return true;
  424. }
  425. static inline bool json_parse_array(LOG_JSON_STATE *js) {
  426. if(!json_expect_char_after_white_space(js, "["))
  427. return false;
  428. json_consume_char(js);
  429. size_t index = 0;
  430. do {
  431. if(!json_key_index_and_push(js, index))
  432. return false;
  433. if(!json_parse_value(js))
  434. return false;
  435. json_key_pop(js);
  436. if(!json_expect_char_after_white_space(js, ",]"))
  437. return false;
  438. const char *s = json_current_pos(js);
  439. json_consume_char(js);
  440. if(*s == ',') {
  441. index++;
  442. continue;
  443. }
  444. else // }
  445. break;
  446. } while(true);
  447. return true;
  448. }
  449. static inline bool json_parse_object(LOG_JSON_STATE *js) {
  450. if(!json_expect_char_after_white_space(js, "{"))
  451. return false;
  452. json_consume_char(js);
  453. do {
  454. if (!json_expect_char_after_white_space(js, "\""))
  455. return false;
  456. if(!json_parse_key_and_push(js))
  457. return false;
  458. if(!json_expect_char_after_white_space(js, ":"))
  459. return false;
  460. json_consume_char(js);
  461. if(!json_parse_value(js))
  462. return false;
  463. json_key_pop(js);
  464. if(!json_expect_char_after_white_space(js, ",}"))
  465. return false;
  466. const char *s = json_current_pos(js);
  467. json_consume_char(js);
  468. if(*s == ',')
  469. continue;
  470. else // }
  471. break;
  472. } while(true);
  473. return true;
  474. }
  475. LOG_JSON_STATE *json_parser_create(struct log_job *jb) {
  476. LOG_JSON_STATE *js = mallocz(sizeof(LOG_JSON_STATE));
  477. memset(js, 0, sizeof(LOG_JSON_STATE));
  478. js->jb = jb;
  479. if(jb->prefix)
  480. copy_to_buffer(js->key, sizeof(js->key), js->jb->prefix, strlen(js->jb->prefix));
  481. js->key_stack[0] = &js->key[strlen(js->key)];
  482. return js;
  483. }
  484. void json_parser_destroy(LOG_JSON_STATE *js) {
  485. if(js)
  486. freez(js);
  487. }
  488. const char *json_parser_error(LOG_JSON_STATE *js) {
  489. return js->msg;
  490. }
  491. bool json_parse_document(LOG_JSON_STATE *js, const char *txt) {
  492. js->line = txt;
  493. js->pos = 0;
  494. js->msg[0] = '\0';
  495. js->key_stack[0][0] = '\0';
  496. js->depth = 0;
  497. if(!json_parse_object(js))
  498. return false;
  499. json_skip_spaces(js);
  500. const char *s = json_current_pos(js);
  501. if(*s) {
  502. snprintf(js->msg, sizeof(js->msg),
  503. "JSON PARSER: excess characters found after document is finished, at pos %zu", js->pos);
  504. return false;
  505. }
  506. return true;
  507. }
  508. void json_test(void) {
  509. struct log_job jb = { .prefix = "NIGNX_" };
  510. LOG_JSON_STATE *json = json_parser_create(&jb);
  511. json_parse_document(json, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}");
  512. json_parser_destroy(json);
  513. }