yajl_parser.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. /*
  2. * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
  3. *
  4. * Permission to use, copy, modify, and/or distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include "api/yajl_parse.h"
  17. #include "yajl_lex.h"
  18. #include "yajl_parser.h"
  19. #include "yajl_encode.h"
  20. #include "yajl_bytestack.h"
  21. #include <stdlib.h>
  22. #include <limits.h>
  23. #include <errno.h>
  24. #include <stdio.h>
  25. #include <string.h>
  26. #include <ctype.h>
  27. #include <assert.h>
  28. #include <math.h>
  29. unsigned char *
  30. yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
  31. size_t jsonTextLen, int verbose)
  32. {
  33. size_t offset = hand->bytesConsumed;
  34. unsigned char * str;
  35. const char * errorType = NULL;
  36. const char * errorText = NULL;
  37. char text[72];
  38. const char * arrow = " (right here) ------^\n";
  39. if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
  40. errorType = "parse";
  41. errorText = hand->parseError;
  42. } else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
  43. errorType = "lexical";
  44. errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
  45. } else {
  46. errorType = "unknown";
  47. }
  48. {
  49. size_t memneeded = 0;
  50. memneeded += strlen(errorType);
  51. memneeded += strlen(" error");
  52. if (errorText != NULL) {
  53. memneeded += strlen(": ");
  54. memneeded += strlen(errorText);
  55. }
  56. str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
  57. if (!str) return NULL;
  58. str[0] = 0;
  59. strcat((char *) str, errorType);
  60. strcat((char *) str, " error");
  61. if (errorText != NULL) {
  62. strcat((char *) str, ": ");
  63. strcat((char *) str, errorText);
  64. }
  65. strcat((char *) str, "\n");
  66. }
  67. /* now we append as many spaces as needed to make sure the error
  68. * falls at char 41, if verbose was specified */
  69. if (verbose) {
  70. size_t start, end, i;
  71. size_t spacesNeeded;
  72. spacesNeeded = (offset < 30 ? 40 - offset : 10);
  73. start = (offset >= 30 ? offset - 30 : 0);
  74. end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
  75. for (i=0;i<spacesNeeded;i++) text[i] = ' ';
  76. for (;start < end;start++, i++) {
  77. if (jsonText[start] != '\n' && jsonText[start] != '\r')
  78. {
  79. text[i] = jsonText[start];
  80. }
  81. else
  82. {
  83. text[i] = ' ';
  84. }
  85. }
  86. assert(i <= 71);
  87. text[i++] = '\n';
  88. text[i] = 0;
  89. {
  90. char * newStr = (char *)
  91. YA_MALLOC(&(hand->alloc), (unsigned int)(strlen((char *) str) +
  92. strlen((char *) text) +
  93. strlen(arrow) + 1));
  94. if (newStr) {
  95. newStr[0] = 0;
  96. strcat((char *) newStr, (char *) str);
  97. strcat((char *) newStr, text);
  98. strcat((char *) newStr, arrow);
  99. }
  100. YA_FREE(&(hand->alloc), str);
  101. str = (unsigned char *) newStr;
  102. }
  103. }
  104. return str;
  105. }
  106. /* check for client cancelation */
  107. #define _CC_CHK(x) \
  108. if (!(x)) { \
  109. yajl_bs_set(hand->stateStack, yajl_state_parse_error); \
  110. hand->parseError = \
  111. "client cancelled parse via callback return value"; \
  112. return yajl_status_client_canceled; \
  113. }
  114. yajl_status
  115. yajl_do_finish(yajl_handle hand)
  116. {
  117. yajl_status stat;
  118. stat = yajl_do_parse(hand,(const unsigned char *) " ",1);
  119. if (stat != yajl_status_ok) return stat;
  120. switch(yajl_bs_current(hand->stateStack))
  121. {
  122. case yajl_state_parse_error:
  123. case yajl_state_lexical_error:
  124. return yajl_status_error;
  125. case yajl_state_got_value:
  126. case yajl_state_parse_complete:
  127. return yajl_status_ok;
  128. default:
  129. if (!(hand->flags & yajl_allow_partial_values))
  130. {
  131. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  132. hand->parseError = "premature EOF";
  133. return yajl_status_error;
  134. }
  135. return yajl_status_ok;
  136. }
  137. }
  138. yajl_status
  139. yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
  140. size_t jsonTextLen)
  141. {
  142. yajl_tok tok;
  143. const unsigned char * buf;
  144. size_t bufLen;
  145. size_t * offset = &(hand->bytesConsumed);
  146. *offset = 0;
  147. around_again:
  148. switch (yajl_bs_current(hand->stateStack)) {
  149. case yajl_state_parse_complete:
  150. if (hand->flags & yajl_allow_multiple_values) {
  151. yajl_bs_set(hand->stateStack, yajl_state_got_value);
  152. goto around_again;
  153. }
  154. if (!(hand->flags & yajl_allow_trailing_garbage)) {
  155. if (*offset != jsonTextLen) {
  156. tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
  157. offset, &buf, &bufLen);
  158. if (tok != yajl_tok_eof) {
  159. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  160. hand->parseError = "trailing garbage";
  161. }
  162. goto around_again;
  163. }
  164. }
  165. return yajl_status_ok;
  166. case yajl_state_lexical_error:
  167. case yajl_state_parse_error:
  168. return yajl_status_error;
  169. case yajl_state_start:
  170. case yajl_state_got_value:
  171. case yajl_state_map_need_val:
  172. case yajl_state_array_need_val:
  173. case yajl_state_array_start: {
  174. /* for arrays and maps, we advance the state for this
  175. * depth, then push the state of the next depth.
  176. * If an error occurs during the parsing of the nesting
  177. * enitity, the state at this level will not matter.
  178. * a state that needs pushing will be anything other
  179. * than state_start */
  180. yajl_state stateToPush = yajl_state_start;
  181. tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
  182. offset, &buf, &bufLen);
  183. switch (tok) {
  184. case yajl_tok_eof:
  185. return yajl_status_ok;
  186. case yajl_tok_error:
  187. yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
  188. goto around_again;
  189. case yajl_tok_string:
  190. if (hand->callbacks && hand->callbacks->yajl_string) {
  191. _CC_CHK(hand->callbacks->yajl_string(hand->ctx,
  192. buf, bufLen));
  193. }
  194. break;
  195. case yajl_tok_string_with_escapes:
  196. if (hand->callbacks && hand->callbacks->yajl_string) {
  197. yajl_buf_clear(hand->decodeBuf);
  198. yajl_string_decode(hand->decodeBuf, buf, bufLen);
  199. _CC_CHK(hand->callbacks->yajl_string(
  200. hand->ctx, yajl_buf_data(hand->decodeBuf),
  201. yajl_buf_len(hand->decodeBuf)));
  202. }
  203. break;
  204. case yajl_tok_bool:
  205. if (hand->callbacks && hand->callbacks->yajl_boolean) {
  206. _CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
  207. *buf == 't'));
  208. }
  209. break;
  210. case yajl_tok_null:
  211. if (hand->callbacks && hand->callbacks->yajl_null) {
  212. _CC_CHK(hand->callbacks->yajl_null(hand->ctx));
  213. }
  214. break;
  215. case yajl_tok_inf:
  216. if (hand->callbacks && hand->callbacks->yajl_double) {
  217. _CC_CHK(hand->callbacks->yajl_double(hand->ctx, INFINITY));
  218. }
  219. break;
  220. case yajl_tok_minus_inf:
  221. if (hand->callbacks && hand->callbacks->yajl_double) {
  222. _CC_CHK(hand->callbacks->yajl_double(hand->ctx, -INFINITY));
  223. }
  224. break;
  225. case yajl_tok_left_bracket:
  226. if (hand->callbacks && hand->callbacks->yajl_start_map) {
  227. _CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
  228. }
  229. stateToPush = yajl_state_map_start;
  230. break;
  231. case yajl_tok_left_brace:
  232. if (hand->callbacks && hand->callbacks->yajl_start_array) {
  233. _CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
  234. }
  235. stateToPush = yajl_state_array_start;
  236. break;
  237. case yajl_tok_integer:
  238. if (hand->callbacks) {
  239. if (hand->callbacks->yajl_number) {
  240. _CC_CHK(hand->callbacks->yajl_number(
  241. hand->ctx,(const char *) buf, bufLen));
  242. } else if (hand->callbacks->yajl_integer || hand->callbacks->yajl_unsigned_integer) {
  243. // NB: We want to use errno for checking parsing correctness.
  244. errno = 0;
  245. long long int i = 0;
  246. unsigned long long int ui = 0;
  247. if (*buf == '-') {
  248. i = yajl_parse_integer(buf, bufLen);
  249. if (errno == 0) {
  250. if (hand->callbacks->yajl_integer) {
  251. _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
  252. i));
  253. }
  254. }
  255. } else {
  256. ui = yajl_parse_unsigned_integer(buf, bufLen);
  257. if (errno == 0) {
  258. if (ui <= LLONG_MAX) {
  259. if (hand->callbacks->yajl_integer) {
  260. _CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
  261. ui));
  262. }
  263. } else {
  264. if (hand->callbacks->yajl_unsigned_integer) {
  265. _CC_CHK(hand->callbacks->yajl_unsigned_integer(hand->ctx,
  266. ui));
  267. }
  268. }
  269. }
  270. }
  271. if (errno == ERANGE) {
  272. yajl_bs_set(hand->stateStack,
  273. yajl_state_parse_error);
  274. hand->parseError = "integer overflow" ;
  275. /* try to restore error offset */
  276. if (*offset >= bufLen) *offset -= bufLen;
  277. else *offset = 0;
  278. goto around_again;
  279. }
  280. }
  281. }
  282. break;
  283. case yajl_tok_double:
  284. if (hand->callbacks) {
  285. if (hand->callbacks->yajl_number) {
  286. _CC_CHK(hand->callbacks->yajl_number(
  287. hand->ctx, (const char *) buf, bufLen));
  288. } else if (hand->callbacks->yajl_double) {
  289. double d = 0.0;
  290. yajl_buf_clear(hand->decodeBuf);
  291. yajl_buf_append(hand->decodeBuf, buf, bufLen);
  292. buf = yajl_buf_data(hand->decodeBuf);
  293. errno = 0;
  294. d = strtod((char *) buf, NULL);
  295. if ((d == HUGE_VAL || d == -HUGE_VAL) &&
  296. errno == ERANGE)
  297. {
  298. yajl_bs_set(hand->stateStack,
  299. yajl_state_parse_error);
  300. hand->parseError = "numeric (floating point) "
  301. "overflow";
  302. /* try to restore error offset */
  303. if (*offset >= bufLen) *offset -= bufLen;
  304. else *offset = 0;
  305. goto around_again;
  306. }
  307. _CC_CHK(hand->callbacks->yajl_double(hand->ctx,
  308. d));
  309. }
  310. }
  311. break;
  312. case yajl_tok_right_brace: {
  313. if (yajl_bs_current(hand->stateStack) ==
  314. yajl_state_array_start)
  315. {
  316. if (hand->callbacks &&
  317. hand->callbacks->yajl_end_array)
  318. {
  319. _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
  320. }
  321. yajl_bs_pop(hand->stateStack);
  322. goto around_again;
  323. }
  324. /* intentional fall-through */
  325. }
  326. case yajl_tok_colon:
  327. case yajl_tok_comma:
  328. case yajl_tok_right_bracket:
  329. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  330. hand->parseError =
  331. "unallowed token at this point in JSON text";
  332. goto around_again;
  333. default:
  334. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  335. hand->parseError = "invalid token, internal error";
  336. goto around_again;
  337. }
  338. /* got a value. transition depends on the state we're in. */
  339. {
  340. yajl_state s = yajl_bs_current(hand->stateStack);
  341. if (s == yajl_state_start || s == yajl_state_got_value) {
  342. yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
  343. } else if (s == yajl_state_map_need_val) {
  344. yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
  345. } else {
  346. yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
  347. }
  348. }
  349. if (stateToPush != yajl_state_start) {
  350. yajl_bs_push(hand->stateStack, stateToPush);
  351. }
  352. goto around_again;
  353. }
  354. case yajl_state_map_start:
  355. case yajl_state_map_need_key: {
  356. /* only difference between these two states is that in
  357. * start '}' is valid, whereas in need_key, we've parsed
  358. * a comma, and a string key _must_ follow */
  359. tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
  360. offset, &buf, &bufLen);
  361. switch (tok) {
  362. case yajl_tok_eof:
  363. return yajl_status_ok;
  364. case yajl_tok_error:
  365. yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
  366. goto around_again;
  367. case yajl_tok_string_with_escapes:
  368. if (hand->callbacks && hand->callbacks->yajl_map_key) {
  369. yajl_buf_clear(hand->decodeBuf);
  370. yajl_string_decode(hand->decodeBuf, buf, bufLen);
  371. buf = yajl_buf_data(hand->decodeBuf);
  372. bufLen = yajl_buf_len(hand->decodeBuf);
  373. }
  374. /* intentional fall-through */
  375. case yajl_tok_string:
  376. if (hand->callbacks && hand->callbacks->yajl_map_key) {
  377. _CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
  378. bufLen));
  379. }
  380. yajl_bs_set(hand->stateStack, yajl_state_map_sep);
  381. goto around_again;
  382. case yajl_tok_right_bracket:
  383. if (yajl_bs_current(hand->stateStack) ==
  384. yajl_state_map_start)
  385. {
  386. if (hand->callbacks && hand->callbacks->yajl_end_map) {
  387. _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
  388. }
  389. yajl_bs_pop(hand->stateStack);
  390. goto around_again;
  391. }
  392. default:
  393. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  394. hand->parseError =
  395. "invalid object key (must be a string)";
  396. goto around_again;
  397. }
  398. }
  399. case yajl_state_map_sep: {
  400. tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
  401. offset, &buf, &bufLen);
  402. switch (tok) {
  403. case yajl_tok_colon:
  404. yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
  405. goto around_again;
  406. case yajl_tok_eof:
  407. return yajl_status_ok;
  408. case yajl_tok_error:
  409. yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
  410. goto around_again;
  411. default:
  412. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  413. hand->parseError = "object key and value must "
  414. "be separated by a colon (':')";
  415. goto around_again;
  416. }
  417. }
  418. case yajl_state_map_got_val: {
  419. tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
  420. offset, &buf, &bufLen);
  421. switch (tok) {
  422. case yajl_tok_right_bracket:
  423. if (hand->callbacks && hand->callbacks->yajl_end_map) {
  424. _CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
  425. }
  426. yajl_bs_pop(hand->stateStack);
  427. goto around_again;
  428. case yajl_tok_comma:
  429. yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
  430. goto around_again;
  431. case yajl_tok_eof:
  432. return yajl_status_ok;
  433. case yajl_tok_error:
  434. yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
  435. goto around_again;
  436. default:
  437. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  438. hand->parseError = "after key and value, inside map, "
  439. "I expect ',' or '}'";
  440. /* try to restore error offset */
  441. if (*offset >= bufLen) *offset -= bufLen;
  442. else *offset = 0;
  443. goto around_again;
  444. }
  445. }
  446. case yajl_state_array_got_val: {
  447. tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
  448. offset, &buf, &bufLen);
  449. switch (tok) {
  450. case yajl_tok_right_brace:
  451. if (hand->callbacks && hand->callbacks->yajl_end_array) {
  452. _CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
  453. }
  454. yajl_bs_pop(hand->stateStack);
  455. goto around_again;
  456. case yajl_tok_comma:
  457. yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
  458. goto around_again;
  459. case yajl_tok_eof:
  460. return yajl_status_ok;
  461. case yajl_tok_error:
  462. yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
  463. goto around_again;
  464. default:
  465. yajl_bs_set(hand->stateStack, yajl_state_parse_error);
  466. hand->parseError =
  467. "after array element, I expect ',' or ']'";
  468. goto around_again;
  469. }
  470. }
  471. }
  472. abort();
  473. return yajl_status_error;
  474. }