JsonPath.g 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. grammar JsonPath;
  2. options {
  3. language = Cpp;
  4. memoize = true;
  5. }
  6. // Root rule. Input is a mode followed by jsonpath expression
  7. jsonpath: (STRICT | LAX)? expr EOF;
  8. // Generic jsonpath expression
  9. expr: or_expr;
  10. // Arithmetic and boolean operations
  11. // Operator precedence:
  12. // 1. Unary plus, minus and logical not
  13. // 2. Multiplication, division, modulus
  14. // 3. Addition, substraction
  15. // 4. Compare operators (<, <=, >, >=)
  16. // 5. Equality operators (==, !=, <>)
  17. // 6. Logical and
  18. // 7. Logical or
  19. // NOTE: We execute JsonPath using bottom up approach. Thus
  20. // operations with higher precedence must be located "deeper" inside AST
  21. or_expr: and_expr (OR and_expr)*;
  22. and_expr: equal_expr (AND equal_expr)*;
  23. equal_expr: compare_expr ((EQUAL | NOT_EQUAL | NOT_EQUAL_SQL) compare_expr)?;
  24. compare_expr: add_expr ((LESS | LESS_EQUAL | GREATER | GREATER_EQUAL) add_expr)?;
  25. add_expr: mul_expr ((PLUS | MINUS) mul_expr)*;
  26. mul_expr: unary_expr ((ASTERISK | SLASH | PERCENT) unary_expr)*;
  27. unary_expr: (PLUS | MINUS | NOT)? predicate_expr;
  28. // Predicates, `"string" starts with "str"`
  29. // NOTE: `is unknown` predicate is defined separately in primary rule. This is done
  30. // because if we add it as an alternative to predicate_expr, ANTLR would need backtacking.
  31. // For example it would not be possible to tell if expression like `( ... ) is unknown` is
  32. // related to `starts with` (and braces are part of plain_expr rule) or it is related to
  33. // `is unknown` rule (and braces are not included in plain_expr).
  34. predicate_expr:
  35. (plain_expr (starts_with_expr | like_regex_expr)?)
  36. | (EXISTS LBRACE expr RBRACE);
  37. starts_with_expr: STARTS WITH plain_expr;
  38. like_regex_expr: LIKE_REGEX STRING_VALUE (FLAG STRING_VALUE)?;
  39. // Plain expression serves as an argument to binary and unary operators
  40. plain_expr: accessor_expr;
  41. accessor_expr: primary accessor_op*;
  42. accessor_op: member_accessor | wildcard_member_accessor | array_accessor | wildcard_array_accessor | filter | method;
  43. // Member acceccors, `$.key` and `$.*`
  44. member_accessor: DOT (identifier | STRING_VALUE);
  45. wildcard_member_accessor: DOT ASTERISK;
  46. // Array accessors, `$[0, 1 to 3, last]` and `$[*]`
  47. array_subscript: expr (TO expr)?;
  48. array_accessor: LBRACE_SQUARE array_subscript (COMMA array_subscript)* RBRACE_SQUARE;
  49. wildcard_array_accessor: LBRACE_SQUARE ASTERISK RBRACE_SQUARE;
  50. // Filters, `$ ? (@.age >= 18)`
  51. filter: QUESTION LBRACE expr RBRACE;
  52. // Methods, `$.abs().ceiling()`
  53. method: DOT (ABS_METHOD | FLOOR_METHOD | CEILING_METHOD | DOUBLE_METHOD | TYPE_METHOD | SIZE_METHOD | KEYVALUE_METHOD) LBRACE RBRACE;
  54. // Primaries are objects to perform operations on:
  55. // 1. All literals:
  56. // - Numbers, `1.23e-5`
  57. // - Bool, `false` and `true`
  58. // - Null, `null`
  59. // - Strings, `"привет"`, `\r\n\t`
  60. // 2. Current object, `$`
  61. // 3. Current filtering object, `@`
  62. // 4. Variables, `$my_cool_variable`
  63. // 5. Last array index, `last`
  64. // 6. Parenthesized jsonpath expression, `($.key + $[0])`
  65. primary:
  66. NUMBER
  67. | DOLLAR
  68. | LAST
  69. | (LBRACE expr RBRACE (IS UNKNOWN)?)
  70. | VARIABLE
  71. | TRUE
  72. | FALSE
  73. | NULL
  74. | STRING_VALUE
  75. | AT;
  76. // Identifier for member accessors and variable names, `$.key` and `$variable_name`
  77. // JsonPath supports using keywords as identifiers. We need to mention keywords in
  78. // identifer rule because otherwise ANTLR will treat them as a separate token.
  79. // For instance input `$.to` without this modification will be treated as
  80. // `DOLLAR DOT TO`, not `DOLLAR DOT IDENTIFIER`
  81. identifier: IDENTIFIER | keyword;
  82. keyword:
  83. ABS_METHOD
  84. | CEILING_METHOD
  85. | DOUBLE_METHOD
  86. | EXISTS
  87. | FALSE
  88. | FLAG
  89. | FLOOR_METHOD
  90. | IS
  91. | KEYVALUE_METHOD
  92. | LAST
  93. | LAX
  94. | LIKE_REGEX
  95. | NULL
  96. | SIZE_METHOD
  97. | STARTS
  98. | STRICT
  99. | TO
  100. | TRUE
  101. | TYPE_METHOD
  102. | UNKNOWN
  103. | WITH;
  104. //
  105. // Lexer
  106. //
  107. AND: '&&';
  108. ASTERISK: '*';
  109. AT: '@';
  110. BACKSLASH: '\\';
  111. COMMA: ',';
  112. DOLLAR: '$';
  113. DOT: '.';
  114. EQUAL: '==';
  115. GREATER_EQUAL: '>=';
  116. GREATER: '>';
  117. LBRACE_SQUARE: '[';
  118. LBRACE: '(';
  119. LESS_EQUAL: '<=';
  120. LESS: '<';
  121. MINUS: '-';
  122. NOT_EQUAL_SQL: '<>';
  123. NOT_EQUAL: '!=';
  124. NOT: '!';
  125. OR: '||';
  126. PERCENT: '%';
  127. PLUS: '+';
  128. QUESTION: '?';
  129. QUOTE_DOUBLE: '"';
  130. QUOTE_SINGLE: '\'';
  131. RBRACE_SQUARE: ']';
  132. RBRACE: ')';
  133. SLASH: '/';
  134. UNDERSCORE: '_';
  135. // Keywords
  136. ABS_METHOD: 'abs';
  137. CEILING_METHOD: 'ceiling';
  138. DOUBLE_METHOD: 'double';
  139. EXISTS: 'exists';
  140. FALSE: 'false';
  141. FLAG: 'flag';
  142. FLOOR_METHOD: 'floor';
  143. IS: 'is';
  144. KEYVALUE_METHOD: 'keyvalue';
  145. LAST: 'last';
  146. LAX: 'lax';
  147. LIKE_REGEX: 'like_regex';
  148. NULL: 'null';
  149. SIZE_METHOD: 'size';
  150. STARTS: 'starts';
  151. STRICT: 'strict';
  152. TO: 'to';
  153. TRUE: 'true';
  154. TYPE_METHOD: 'type';
  155. UNKNOWN: 'unknown';
  156. WITH: 'with';
  157. // String literal
  158. fragment STRING_CORE_SINGLE: ( ~(QUOTE_SINGLE | BACKSLASH) | (BACKSLASH .) )*;
  159. fragment STRING_CORE_DOUBLE: ( ~(QUOTE_DOUBLE | BACKSLASH) | (BACKSLASH .) )*;
  160. fragment STRING_SINGLE: (QUOTE_SINGLE STRING_CORE_SINGLE QUOTE_SINGLE);
  161. fragment STRING_DOUBLE: (QUOTE_DOUBLE STRING_CORE_DOUBLE QUOTE_DOUBLE);
  162. STRING_VALUE: (STRING_SINGLE | STRING_DOUBLE);
  163. // Number literal
  164. fragment DIGIT: '0'..'9';
  165. fragment DIGITS: DIGIT+;
  166. fragment REAL_PART: DOT DIGITS;
  167. fragment EXP_PART: ('e' | 'E') (PLUS | MINUS)? DIGITS;
  168. NUMBER: DIGITS REAL_PART? EXP_PART?;
  169. // Javascript identifier
  170. fragment ID_START: ('a'..'z' | 'A'..'Z' | UNDERSCORE);
  171. fragment ID_CORE: (ID_START | DIGIT | DOLLAR);
  172. IDENTIFIER: ID_START (ID_CORE)*;
  173. // Jsonpath variable
  174. VARIABLE: DOLLAR (ID_CORE)*;
  175. WS: (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;};
  176. // FIXME: WS and COMMENT tokens are currently required.
  177. // FIXME: Since there are no comments in JSONPATH, we split whitespace characters between WS and COMMENT
  178. COMMENT: ('\u000C') {$channel=HIDDEN;};