|
- grammar JsonPath;
- options {
- language = Cpp;
- memoize = true;
- }
- // Root rule. Input is a mode followed by jsonpath expression
- jsonpath: (STRICT | LAX)? expr EOF;
- // Generic jsonpath expression
- expr: or_expr;
- // Arithmetic and boolean operations
- // Operator precedence:
- // 1. Unary plus, minus and logical not
- // 2. Multiplication, division, modulus
- // 3. Addition, substraction
- // 4. Compare operators (<, <=, >, >=)
- // 5. Equality operators (==, !=, <>)
- // 6. Logical and
- // 7. Logical or
- // NOTE: We execute JsonPath using bottom up approach. Thus
- // operations with higher precedence must be located "deeper" inside AST
- or_expr: and_expr (OR and_expr)*;
- and_expr: equal_expr (AND equal_expr)*;
- equal_expr: compare_expr ((EQUAL | NOT_EQUAL | NOT_EQUAL_SQL) compare_expr)?;
- compare_expr: add_expr ((LESS | LESS_EQUAL | GREATER | GREATER_EQUAL) add_expr)?;
- add_expr: mul_expr ((PLUS | MINUS) mul_expr)*;
- mul_expr: unary_expr ((ASTERISK | SLASH | PERCENT) unary_expr)*;
- unary_expr: (PLUS | MINUS | NOT)? predicate_expr;
- // Predicates, `"string" starts with "str"`
- // NOTE: `is unknown` predicate is defined separately in primary rule. This is done
- // because if we add it as an alternative to predicate_expr, ANTLR would need backtacking.
- // For example it would not be possible to tell if expression like `( ... ) is unknown` is
- // related to `starts with` (and braces are part of plain_expr rule) or it is related to
- // `is unknown` rule (and braces are not included in plain_expr).
- predicate_expr:
- (plain_expr (starts_with_expr | like_regex_expr)?)
- | (EXISTS LBRACE expr RBRACE);
- starts_with_expr: STARTS WITH plain_expr;
- like_regex_expr: LIKE_REGEX STRING_VALUE (FLAG STRING_VALUE)?;
- // Plain expression serves as an argument to binary and unary operators
- plain_expr: accessor_expr;
- accessor_expr: primary accessor_op*;
- accessor_op: member_accessor | wildcard_member_accessor | array_accessor | wildcard_array_accessor | filter | method;
- // Member acceccors, `$.key` and `$.*`
- member_accessor: DOT (identifier | STRING_VALUE);
- wildcard_member_accessor: DOT ASTERISK;
- // Array accessors, `$[0, 1 to 3, last]` and `$[*]`
- array_subscript: expr (TO expr)?;
- array_accessor: LBRACE_SQUARE array_subscript (COMMA array_subscript)* RBRACE_SQUARE;
- wildcard_array_accessor: LBRACE_SQUARE ASTERISK RBRACE_SQUARE;
- // Filters, `$ ? (@.age >= 18)`
- filter: QUESTION LBRACE expr RBRACE;
- // Methods, `$.abs().ceiling()`
- method: DOT (ABS_METHOD | FLOOR_METHOD | CEILING_METHOD | DOUBLE_METHOD | TYPE_METHOD | SIZE_METHOD | KEYVALUE_METHOD) LBRACE RBRACE;
- // Primaries are objects to perform operations on:
- // 1. All literals:
- // - Numbers, `1.23e-5`
- // - Bool, `false` and `true`
- // - Null, `null`
- // - Strings, `"привет"`, `\r\n\t`
- // 2. Current object, `$`
- // 3. Current filtering object, `@`
- // 4. Variables, `$my_cool_variable`
- // 5. Last array index, `last`
- // 6. Parenthesized jsonpath expression, `($.key + $[0])`
- primary:
- NUMBER
- | DOLLAR
- | LAST
- | (LBRACE expr RBRACE (IS UNKNOWN)?)
- | VARIABLE
- | TRUE
- | FALSE
- | NULL
- | STRING_VALUE
- | AT;
- // Identifier for member accessors and variable names, `$.key` and `$variable_name`
- // JsonPath supports using keywords as identifiers. We need to mention keywords in
- // identifer rule because otherwise ANTLR will treat them as a separate token.
- // For instance input `$.to` without this modification will be treated as
- // `DOLLAR DOT TO`, not `DOLLAR DOT IDENTIFIER`
- identifier: IDENTIFIER | keyword;
- keyword:
- ABS_METHOD
- | CEILING_METHOD
- | DOUBLE_METHOD
- | EXISTS
- | FALSE
- | FLAG
- | FLOOR_METHOD
- | IS
- | KEYVALUE_METHOD
- | LAST
- | LAX
- | LIKE_REGEX
- | NULL
- | SIZE_METHOD
- | STARTS
- | STRICT
- | TO
- | TRUE
- | TYPE_METHOD
- | UNKNOWN
- | WITH;
- //
- // Lexer
- //
- AND: '&&';
- ASTERISK: '*';
- AT: '@';
- BACKSLASH: '\\';
- COMMA: ',';
- DOLLAR: '$';
- DOT: '.';
- EQUAL: '==';
- GREATER_EQUAL: '>=';
- GREATER: '>';
- LBRACE_SQUARE: '[';
- LBRACE: '(';
- LESS_EQUAL: '<=';
- LESS: '<';
- MINUS: '-';
- NOT_EQUAL_SQL: '<>';
- NOT_EQUAL: '!=';
- NOT: '!';
- OR: '||';
- PERCENT: '%';
- PLUS: '+';
- QUESTION: '?';
- QUOTE_DOUBLE: '"';
- QUOTE_SINGLE: '\'';
- RBRACE_SQUARE: ']';
- RBRACE: ')';
- SLASH: '/';
- UNDERSCORE: '_';
- // Keywords
- ABS_METHOD: 'abs';
- CEILING_METHOD: 'ceiling';
- DOUBLE_METHOD: 'double';
- EXISTS: 'exists';
- FALSE: 'false';
- FLAG: 'flag';
- FLOOR_METHOD: 'floor';
- IS: 'is';
- KEYVALUE_METHOD: 'keyvalue';
- LAST: 'last';
- LAX: 'lax';
- LIKE_REGEX: 'like_regex';
- NULL: 'null';
- SIZE_METHOD: 'size';
- STARTS: 'starts';
- STRICT: 'strict';
- TO: 'to';
- TRUE: 'true';
- TYPE_METHOD: 'type';
- UNKNOWN: 'unknown';
- WITH: 'with';
- // String literal
- fragment STRING_CORE_SINGLE: ( ~(QUOTE_SINGLE | BACKSLASH) | (BACKSLASH .) )*;
- fragment STRING_CORE_DOUBLE: ( ~(QUOTE_DOUBLE | BACKSLASH) | (BACKSLASH .) )*;
- fragment STRING_SINGLE: (QUOTE_SINGLE STRING_CORE_SINGLE QUOTE_SINGLE);
- fragment STRING_DOUBLE: (QUOTE_DOUBLE STRING_CORE_DOUBLE QUOTE_DOUBLE);
- STRING_VALUE: (STRING_SINGLE | STRING_DOUBLE);
- // Number literal
- fragment DIGIT: '0'..'9';
- fragment DIGITS: DIGIT+;
- fragment REAL_PART: DOT DIGITS;
- fragment EXP_PART: ('e' | 'E') (PLUS | MINUS)? DIGITS;
- NUMBER: DIGITS REAL_PART? EXP_PART?;
- // Javascript identifier
- fragment ID_START: ('a'..'z' | 'A'..'Z' | UNDERSCORE);
- fragment ID_CORE: (ID_START | DIGIT | DOLLAR);
- IDENTIFIER: ID_START (ID_CORE)*;
- // Jsonpath variable
- VARIABLE: DOLLAR (ID_CORE)*;
- WS: (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;};
- // FIXME: WS and COMMENT tokens are currently required.
- // FIXME: Since there are no comments in JSONPATH, we split whitespace characters between WS and COMMENT
- COMMENT: ('\u000C') {$channel=HIDDEN;};
|