123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766 |
- /*
- * Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include "yajl_lex.h"
- #include "yajl_buf.h"
- #include <stdlib.h>
- #include <stdio.h>
- #include <assert.h>
- #include <string.h>
- #ifdef YAJL_LEXER_DEBUG
- static const char *
- tokToStr(yajl_tok tok)
- {
- switch (tok) {
- case yajl_tok_bool: return "bool";
- case yajl_tok_colon: return "colon";
- case yajl_tok_comma: return "comma";
- case yajl_tok_eof: return "eof";
- case yajl_tok_error: return "error";
- case yajl_tok_left_brace: return "brace";
- case yajl_tok_left_bracket: return "bracket";
- case yajl_tok_null: return "null";
- case yajl_tok_inf: return "infinity";
- case yajl_tok_minus_inf: return "-infinity";
- case yajl_tok_integer: return "integer";
- case yajl_tok_double: return "double";
- case yajl_tok_right_brace: return "brace";
- case yajl_tok_right_bracket: return "bracket";
- case yajl_tok_string: return "string";
- case yajl_tok_string_with_escapes: return "string_with_escapes";
- }
- return "unknown";
- }
- #endif
- /* Impact of the stream parsing feature on the lexer:
- *
- * YAJL support stream parsing. That is, the ability to parse the first
- * bits of a chunk of JSON before the last bits are available (still on
- * the network or disk). This makes the lexer more complex. The
- * responsibility of the lexer is to handle transparently the case where
- * a chunk boundary falls in the middle of a token. This is
- * accomplished is via a buffer and a character reading abstraction.
- *
- * Overview of implementation
- *
- * When we lex to end of input string before end of token is hit, we
- * copy all of the input text composing the token into our lexBuf.
- *
- * Every time we read a character, we do so through the readChar function.
- * readChar's responsibility is to handle pulling all chars from the buffer
- * before pulling chars from input text
- */
- struct yajl_lexer_t {
- /* the overal line and char offset into the data */
- size_t lineOff;
- size_t charOff;
- /* error */
- yajl_lex_error error;
- /* a input buffer to handle the case where a token is spread over
- * multiple chunks */
- yajl_buf buf;
- /* in the case where we have data in the lexBuf, bufOff holds
- * the current offset into the lexBuf. */
- size_t bufOff;
- /* are we using the lex buf? */
- unsigned int bufInUse;
- /* shall we allow comments? */
- unsigned int allowComments;
- /* shall we validate utf8 inside strings? */
- unsigned int validateUTF8;
- yajl_alloc_funcs * alloc;
- };
- #define readChar(lxr, txt, off) \
- (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
- (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
- ((txt)[(*(off))++]))
- #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
- yajl_lexer
- yajl_lex_alloc(yajl_alloc_funcs * alloc,
- unsigned int allowComments,
- unsigned int validateUTF8)
- {
- yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
- memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
- lxr->buf = yajl_buf_alloc(alloc);
- lxr->allowComments = allowComments;
- lxr->validateUTF8 = validateUTF8;
- lxr->alloc = alloc;
- return lxr;
- }
- void
- yajl_lex_free(yajl_lexer lxr)
- {
- yajl_buf_free(lxr->buf);
- YA_FREE(lxr->alloc, lxr);
- return;
- }
- /* a lookup table which lets us quickly determine three things:
- * VEC - valid escaped control char
- * note. the solidus '/' may be escaped or not.
- * IJC - invalid json char
- * VHC - valid hex char
- * NFP - needs further processing (from a string scanning perspective)
- * NUC - needs utf8 checking when enabled (from a string scanning perspective)
- */
- #define VEC 0x01
- #define IJC 0x02
- #define VHC 0x04
- #define NFP 0x08
- #define NUC 0x10
- static const char charLookupTable[256] =
- {
- /*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
- /*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
- /*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
- /*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
- /*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
- /*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
- /*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
- /*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
- /*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
- /*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
- /*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
- /*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 ,
- /*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
- /*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
- /*70*/ 0 , 0 , VEC , 0 , VEC , 0 , 0 , 0 ,
- /*78*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC ,
- NUC , NUC , NUC , NUC , NUC , NUC , NUC , NUC
- };
- /** process a variable length utf8 encoded codepoint.
- *
- * returns:
- * yajl_tok_string - if valid utf8 char was parsed and offset was
- * advanced
- * yajl_tok_eof - if end of input was hit before validation could
- * complete
- * yajl_tok_error - if invalid utf8 was encountered
- *
- * NOTE: on error the offset will point to the first char of the
- * invalid utf8 */
- #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
- static yajl_tok
- yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
- size_t jsonTextLen, size_t * offset,
- unsigned char curChar)
- {
- if (curChar <= 0x7f) {
- /* single byte */
- return yajl_tok_string;
- } else if ((curChar >> 5) == 0x6) {
- /* two byte */
- UTF8_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if ((curChar >> 6) == 0x2) return yajl_tok_string;
- } else if ((curChar >> 4) == 0x0e) {
- /* three byte */
- UTF8_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if ((curChar >> 6) == 0x2) {
- UTF8_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if ((curChar >> 6) == 0x2) return yajl_tok_string;
- }
- } else if ((curChar >> 3) == 0x1e) {
- /* four byte */
- UTF8_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if ((curChar >> 6) == 0x2) {
- UTF8_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if ((curChar >> 6) == 0x2) {
- UTF8_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if ((curChar >> 6) == 0x2) return yajl_tok_string;
- }
- }
- }
- return yajl_tok_error;
- }
- /* lex a string. input is the lexer, pointer to beginning of
- * json text, and start of string (offset).
- * a token is returned which has the following meanings:
- * yajl_tok_string: lex of string was successful. offset points to
- * terminating '"'.
- * yajl_tok_eof: end of text was encountered before we could complete
- * the lex.
- * yajl_tok_error: embedded in the string were unallowable chars. offset
- * points to the offending char
- */
- #define STR_CHECK_EOF \
- if (*offset >= jsonTextLen) { \
- tok = yajl_tok_eof; \
- goto finish_string_lex; \
- }
- /** scan a string for interesting characters that might need further
- * review. return the number of chars that are uninteresting and can
- * be skipped.
- * (lth) hi world, any thoughts on how to make this routine faster? */
- static size_t
- yajl_string_scan(const unsigned char * buf, size_t len, int utf8check)
- {
- unsigned char mask = IJC|NFP|(utf8check ? NUC : 0);
- size_t skip = 0;
- while (skip < len && !(charLookupTable[*buf] & mask))
- {
- skip++;
- buf++;
- }
- return skip;
- }
- static yajl_tok
- yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
- size_t jsonTextLen, size_t * offset)
- {
- yajl_tok tok = yajl_tok_error;
- int hasEscapes = 0;
- for (;;) {
- unsigned char curChar;
- /* now jump into a faster scanning routine to skip as much
- * of the buffers as possible */
- {
- const unsigned char * p;
- size_t len;
- if ((lexer->bufInUse && yajl_buf_len(lexer->buf) &&
- lexer->bufOff < yajl_buf_len(lexer->buf)))
- {
- p = ((const unsigned char *) yajl_buf_data(lexer->buf) +
- (lexer->bufOff));
- len = yajl_buf_len(lexer->buf) - lexer->bufOff;
- lexer->bufOff += yajl_string_scan(p, len, lexer->validateUTF8);
- }
- else if (*offset < jsonTextLen)
- {
- p = jsonText + *offset;
- len = jsonTextLen - *offset;
- *offset += yajl_string_scan(p, len, lexer->validateUTF8);
- }
- }
- STR_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- /* quote terminates */
- if (curChar == '"') {
- tok = yajl_tok_string;
- break;
- }
- /* backslash escapes a set of control chars, */
- else if (curChar == '\\') {
- hasEscapes = 1;
- STR_CHECK_EOF;
- /* special case \u */
- curChar = readChar(lexer, jsonText, offset);
- if (curChar == 'u') {
- unsigned int i = 0;
- for (i=0;i<4;i++) {
- STR_CHECK_EOF;
- curChar = readChar(lexer, jsonText, offset);
- if (!(charLookupTable[curChar] & VHC)) {
- /* back up to offending char */
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_string_invalid_hex_char;
- goto finish_string_lex;
- }
- }
- } else if (!(charLookupTable[curChar] & VEC)) {
- /* back up to offending char */
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_string_invalid_escaped_char;
- goto finish_string_lex;
- }
- }
- /* when not validating UTF8 it's a simple table lookup to determine
- * if the present character is invalid */
- else if(charLookupTable[curChar] & IJC) {
- /* back up to offending char */
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_string_invalid_json_char;
- goto finish_string_lex;
- }
- /* when in validate UTF8 mode we need to do some extra work */
- else if (lexer->validateUTF8) {
- yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
- offset, curChar);
- if (t == yajl_tok_eof) {
- tok = yajl_tok_eof;
- goto finish_string_lex;
- } else if (t == yajl_tok_error) {
- lexer->error = yajl_lex_string_invalid_utf8;
- goto finish_string_lex;
- }
- }
- /* accept it, and move on */
- }
- finish_string_lex:
- /* tell our buddy, the parser, wether he needs to process this string
- * again */
- if (hasEscapes && tok == yajl_tok_string) {
- tok = yajl_tok_string_with_escapes;
- }
- return tok;
- }
- #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
- static yajl_tok
- yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
- size_t jsonTextLen, size_t * offset)
- {
- /** XXX: numbers are the only entities in json that we must lex
- * _beyond_ in order to know that they are complete. There
- * is an ambiguous case for integers at EOF. */
- unsigned char c;
- yajl_tok tok = yajl_tok_integer;
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- /* optional leading minus */
- char minus = 0;
- if (c == '-') {
- minus = 1;
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- }
- /* a single zero, or a series of integers */
- if (c == '0') {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- } else if (c >= '1' && c <= '9') {
- do {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- } while (c >= '0' && c <= '9');
- } else if (c == 'i') {
- if (readChar(lexer, jsonText, offset) != 'n') {
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_invalid_infinity;
- return yajl_tok_error;
- }
- if (readChar(lexer, jsonText, offset) != 'f') {
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_invalid_infinity;
- return yajl_tok_error;
- }
- if (minus) {
- return yajl_tok_minus_inf;
- } else {
- return yajl_tok_inf;
- }
- } else {
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_missing_integer_after_minus;
- return yajl_tok_error;
- }
- /* optional fraction (indicates this is floating point) */
- if (c == '.') {
- int numRd = 0;
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- while (c >= '0' && c <= '9') {
- numRd++;
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- }
- if (!numRd) {
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_missing_integer_after_decimal;
- return yajl_tok_error;
- }
- tok = yajl_tok_double;
- }
- /* optional exponent (indicates this is floating point) */
- if (c == 'e' || c == 'E') {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- /* optional sign */
- if (c == '+' || c == '-') {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- }
- if (c >= '0' && c <= '9') {
- do {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- } while (c >= '0' && c <= '9');
- } else {
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_missing_integer_after_exponent;
- return yajl_tok_error;
- }
- tok = yajl_tok_double;
- }
- /* we always go "one too far" */
- unreadChar(lexer, offset);
- return tok;
- }
- static yajl_tok
- yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
- size_t jsonTextLen, size_t * offset)
- {
- unsigned char c;
- yajl_tok tok = yajl_tok_comment;
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- /* either slash or star expected */
- if (c == '/') {
- /* now we throw away until end of line */
- do {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- } while (c != '\n');
- } else if (c == '*') {
- /* now we throw away until end of comment */
- for (;;) {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- if (c == '*') {
- RETURN_IF_EOF;
- c = readChar(lexer, jsonText, offset);
- if (c == '/') {
- break;
- } else {
- unreadChar(lexer, offset);
- }
- }
- }
- } else {
- lexer->error = yajl_lex_invalid_char;
- tok = yajl_tok_error;
- }
- return tok;
- }
- #define MATCH(want_value, target_token) \
- const char * want = want_value; \
- do { \
- if (*offset >= jsonTextLen) { \
- tok = yajl_tok_eof; \
- goto lexed; \
- } \
- c = readChar(lexer, jsonText, offset); \
- if (c != *want) { \
- unreadChar(lexer, offset); \
- lexer->error = yajl_lex_invalid_string; \
- tok = yajl_tok_error; \
- goto lexed; \
- } \
- } while (*(++want)); \
- tok = target_token; \
- goto lexed;
- yajl_tok
- yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
- size_t jsonTextLen, size_t * offset,
- const unsigned char ** outBuf, size_t * outLen)
- {
- yajl_tok tok = yajl_tok_error;
- unsigned char c;
- size_t startOffset = *offset;
- *outBuf = NULL;
- *outLen = 0;
- for (;;) {
- assert(*offset <= jsonTextLen);
- if (*offset >= jsonTextLen) {
- tok = yajl_tok_eof;
- goto lexed;
- }
- c = readChar(lexer, jsonText, offset);
- switch (c) {
- case '{':
- tok = yajl_tok_left_bracket;
- goto lexed;
- case '}':
- tok = yajl_tok_right_bracket;
- goto lexed;
- case '[':
- tok = yajl_tok_left_brace;
- goto lexed;
- case ']':
- tok = yajl_tok_right_brace;
- goto lexed;
- case ',':
- tok = yajl_tok_comma;
- goto lexed;
- case ':':
- tok = yajl_tok_colon;
- goto lexed;
- case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
- startOffset++;
- break;
- case 't': {
- MATCH("rue", yajl_tok_bool);
- }
- case 'f': {
- MATCH("alse", yajl_tok_bool);
- }
- case 'n': {
- MATCH("ull", yajl_tok_null);
- }
- case '"': {
- tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
- jsonTextLen, offset);
- goto lexed;
- }
- case '-':
- case 'i':
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9': {
- /* integer parsing wants to start from the beginning */
- unreadChar(lexer, offset);
- tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
- jsonTextLen, offset);
- goto lexed;
- }
- case '/':
- /* hey, look, a probable comment! If comments are disabled
- * it's an error. */
- if (!lexer->allowComments) {
- unreadChar(lexer, offset);
- lexer->error = yajl_lex_unallowed_comment;
- tok = yajl_tok_error;
- goto lexed;
- }
- /* if comments are enabled, then we should try to lex
- * the thing. possible outcomes are
- * - successful lex (tok_comment, which means continue),
- * - malformed comment opening (slash not followed by
- * '*' or '/') (tok_error)
- * - eof hit. (tok_eof) */
- tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
- jsonTextLen, offset);
- if (tok == yajl_tok_comment) {
- /* "error" is silly, but that's the initial
- * state of tok. guilty until proven innocent. */
- tok = yajl_tok_error;
- yajl_buf_clear(lexer->buf);
- lexer->bufInUse = 0;
- startOffset = *offset;
- break;
- }
- /* hit error or eof, bail */
- goto lexed;
- default:
- lexer->error = yajl_lex_invalid_char;
- tok = yajl_tok_error;
- goto lexed;
- }
- }
- lexed:
- /* need to append to buffer if the buffer is in use or
- * if it's an EOF token */
- if (tok == yajl_tok_eof || lexer->bufInUse) {
- if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
- lexer->bufInUse = 1;
- yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
- lexer->bufOff = 0;
- if (tok != yajl_tok_eof) {
- *outBuf = yajl_buf_data(lexer->buf);
- *outLen = yajl_buf_len(lexer->buf);
- lexer->bufInUse = 0;
- }
- } else if (tok != yajl_tok_error) {
- *outBuf = jsonText + startOffset;
- *outLen = *offset - startOffset;
- }
- /* special case for strings. skip the quotes. */
- if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
- {
- assert(*outLen >= 2);
- (*outBuf)++;
- *outLen -= 2;
- }
- #ifdef YAJL_LEXER_DEBUG
- if (tok == yajl_tok_error) {
- printf("lexical error: %s\n",
- yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
- } else if (tok == yajl_tok_eof) {
- printf("EOF hit\n");
- } else {
- printf("lexed %s: '", tokToStr(tok));
- fwrite(*outBuf, 1, *outLen, stdout);
- printf("'\n");
- }
- #endif
- return tok;
- }
- const char *
- yajl_lex_error_to_string(yajl_lex_error error)
- {
- switch (error) {
- case yajl_lex_e_ok:
- return "ok, no error";
- case yajl_lex_string_invalid_utf8:
- return "invalid bytes in UTF8 string.";
- case yajl_lex_string_invalid_escaped_char:
- return "inside a string, '\\' occurs before a character "
- "which it may not.";
- case yajl_lex_string_invalid_json_char:
- return "invalid character inside string.";
- case yajl_lex_string_invalid_hex_char:
- return "invalid (non-hex) character occurs after '\\u' inside "
- "string.";
- case yajl_lex_invalid_char:
- return "invalid char in json text.";
- case yajl_lex_invalid_string:
- return "invalid string in json text.";
- case yajl_lex_missing_integer_after_exponent:
- return "malformed number, a digit is required after the exponent.";
- case yajl_lex_missing_integer_after_decimal:
- return "malformed number, a digit is required after the "
- "decimal point.";
- case yajl_lex_missing_integer_after_minus:
- return "malformed number, a digit is required after the "
- "minus sign.";
- case yajl_lex_invalid_infinity:
- return "malformed number, a token inf required for number starting "
- "from 'i'";
- case yajl_lex_unallowed_comment:
- return "probable comment found in input text, comments are "
- "not enabled.";
- }
- return "unknown error code";
- }
- /** allows access to more specific information about the lexical
- * error when yajl_lex_lex returns yajl_tok_error. */
- yajl_lex_error
- yajl_lex_get_error(yajl_lexer lexer)
- {
- if (lexer == NULL) return (yajl_lex_error) -1;
- return lexer->error;
- }
- size_t yajl_lex_current_line(yajl_lexer lexer)
- {
- return lexer->lineOff;
- }
- size_t yajl_lex_current_char(yajl_lexer lexer)
- {
- return lexer->charOff;
- }
- yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
- size_t jsonTextLen, size_t offset)
- {
- const unsigned char * outBuf;
- size_t outLen;
- size_t bufLen = yajl_buf_len(lexer->buf);
- size_t bufOff = lexer->bufOff;
- unsigned int bufInUse = lexer->bufInUse;
- yajl_tok tok;
- tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
- &outBuf, &outLen);
- lexer->bufOff = bufOff;
- lexer->bufInUse = bufInUse;
- yajl_buf_truncate(lexer->buf, bufLen);
- return tok;
- }
- size_t yajl_lex_buf_capacity(yajl_lexer lexer)
- {
- return yajl_buf_capacity(lexer->buf);
- }
|