test_parser.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. #!/usr/bin/env python
  2. import re
  3. from . import unittest, OrderedDict
  4. from jmespath import parser
  5. from jmespath import visitor
  6. from jmespath import ast
  7. from jmespath import exceptions
  8. class TestParser(unittest.TestCase):
  9. def setUp(self):
  10. self.parser = parser.Parser()
  11. def assert_parsed_ast(self, expression, expected_ast):
  12. parsed = self.parser.parse(expression)
  13. self.assertEqual(parsed.parsed, expected_ast)
  14. def test_parse_empty_string_raises_exception(self):
  15. with self.assertRaises(exceptions.EmptyExpressionError):
  16. self.parser.parse('')
  17. def test_field(self):
  18. self.assert_parsed_ast('foo', ast.field('foo'))
  19. def test_dot_syntax(self):
  20. self.assert_parsed_ast('foo.bar',
  21. ast.subexpression([ast.field('foo'),
  22. ast.field('bar')]))
  23. def test_multiple_dots(self):
  24. parsed = self.parser.parse('foo.bar.baz')
  25. self.assertEqual(
  26. parsed.search({'foo': {'bar': {'baz': 'correct'}}}), 'correct')
  27. def test_index(self):
  28. parsed = self.parser.parse('foo[1]')
  29. self.assertEqual(
  30. parsed.search({'foo': ['zero', 'one', 'two']}),
  31. 'one')
  32. def test_quoted_subexpression(self):
  33. self.assert_parsed_ast('"foo"."bar"',
  34. ast.subexpression([
  35. ast.field('foo'),
  36. ast.field('bar')]))
  37. def test_wildcard(self):
  38. parsed = self.parser.parse('foo[*]')
  39. self.assertEqual(
  40. parsed.search({'foo': ['zero', 'one', 'two']}),
  41. ['zero', 'one', 'two'])
  42. def test_wildcard_with_children(self):
  43. parsed = self.parser.parse('foo[*].bar')
  44. self.assertEqual(
  45. parsed.search({'foo': [{'bar': 'one'}, {'bar': 'two'}]}),
  46. ['one', 'two'])
  47. def test_or_expression(self):
  48. parsed = self.parser.parse('foo || bar')
  49. self.assertEqual(parsed.search({'foo': 'foo'}), 'foo')
  50. self.assertEqual(parsed.search({'bar': 'bar'}), 'bar')
  51. self.assertEqual(parsed.search({'foo': 'foo', 'bar': 'bar'}), 'foo')
  52. self.assertEqual(parsed.search({'bad': 'bad'}), None)
  53. def test_complex_or_expression(self):
  54. parsed = self.parser.parse('foo.foo || foo.bar')
  55. self.assertEqual(parsed.search({'foo': {'foo': 'foo'}}), 'foo')
  56. self.assertEqual(parsed.search({'foo': {'bar': 'bar'}}), 'bar')
  57. self.assertEqual(parsed.search({'foo': {'baz': 'baz'}}), None)
  58. def test_or_repr(self):
  59. self.assert_parsed_ast('foo || bar', ast.or_expression(ast.field('foo'),
  60. ast.field('bar')))
  61. def test_unicode_literals_escaped(self):
  62. self.assert_parsed_ast(r'`"\u2713"`', ast.literal(u'\u2713'))
  63. def test_multiselect(self):
  64. parsed = self.parser.parse('foo.{bar: bar,baz: baz}')
  65. self.assertEqual(
  66. parsed.search({'foo': {'bar': 'bar', 'baz': 'baz', 'qux': 'qux'}}),
  67. {'bar': 'bar', 'baz': 'baz'})
  68. def test_multiselect_subexpressions(self):
  69. parsed = self.parser.parse('foo.{"bar.baz": bar.baz, qux: qux}')
  70. self.assertEqual(
  71. parsed.search({'foo': {'bar': {'baz': 'CORRECT'}, 'qux': 'qux'}}),
  72. {'bar.baz': 'CORRECT', 'qux': 'qux'})
  73. def test_multiselect_with_all_quoted_keys(self):
  74. parsed = self.parser.parse('foo.{"bar": bar.baz, "qux": qux}')
  75. result = parsed.search({'foo': {'bar': {'baz': 'CORRECT'}, 'qux': 'qux'}})
  76. self.assertEqual(result, {"bar": "CORRECT", "qux": "qux"})
  77. def test_function_call_with_and_statement(self):
  78. self.assert_parsed_ast(
  79. 'f(@ && @)',
  80. {'children': [{'children': [{'children': [], 'type': 'current'},
  81. {'children': [], 'type': 'current'}],
  82. 'type': 'and_expression'}],
  83. 'type': 'function_expression',
  84. 'value': 'f'})
  85. class TestErrorMessages(unittest.TestCase):
  86. def setUp(self):
  87. self.parser = parser.Parser()
  88. def assert_error_message(self, expression, error_message,
  89. exception=exceptions.ParseError):
  90. try:
  91. self.parser.parse(expression)
  92. except exception as e:
  93. self.assertEqual(error_message, str(e))
  94. return
  95. except Exception as e:
  96. self.fail(
  97. "Unexpected error raised (%s: %s) for bad expression: %s" %
  98. (e.__class__.__name__, e, expression))
  99. else:
  100. self.fail(
  101. "ParseError not raised for bad expression: %s" % expression)
  102. def test_bad_parse(self):
  103. with self.assertRaises(exceptions.ParseError):
  104. self.parser.parse('foo]baz')
  105. def test_bad_parse_error_message(self):
  106. error_message = (
  107. 'Unexpected token: ]: Parse error at column 3, '
  108. 'token "]" (RBRACKET), for expression:\n'
  109. '"foo]baz"\n'
  110. ' ^')
  111. self.assert_error_message('foo]baz', error_message)
  112. def test_bad_parse_error_message_with_multiselect(self):
  113. error_message = (
  114. 'Invalid jmespath expression: Incomplete expression:\n'
  115. '"foo.{bar: baz,bar: bar"\n'
  116. ' ^')
  117. self.assert_error_message('foo.{bar: baz,bar: bar', error_message)
  118. def test_incomplete_expression_with_missing_paren(self):
  119. error_message = (
  120. 'Invalid jmespath expression: Incomplete expression:\n'
  121. '"length(@,"\n'
  122. ' ^')
  123. self.assert_error_message('length(@,', error_message)
  124. def test_bad_lexer_values(self):
  125. error_message = (
  126. 'Bad jmespath expression: '
  127. 'Unclosed " delimiter:\n'
  128. 'foo."bar\n'
  129. ' ^')
  130. self.assert_error_message('foo."bar', error_message,
  131. exception=exceptions.LexerError)
  132. def test_bad_unicode_string(self):
  133. # This error message is straight from the JSON parser
  134. # and pypy has a slightly different error message,
  135. # so we're not using assert_error_message.
  136. error_message = re.compile(
  137. r'Bad jmespath expression: '
  138. r'Invalid \\uXXXX escape.*\\uAZ12', re.DOTALL)
  139. with self.assertRaisesRegexp(exceptions.LexerError, error_message):
  140. self.parser.parse(r'"\uAZ12"')
  141. class TestParserWildcards(unittest.TestCase):
  142. def setUp(self):
  143. self.parser = parser.Parser()
  144. self.data = {
  145. 'foo': [
  146. {'bar': [{'baz': 'one'}, {'baz': 'two'}]},
  147. {'bar': [{'baz': 'three'}, {'baz': 'four'}, {'baz': 'five'}]},
  148. ]
  149. }
  150. def test_multiple_index_wildcards(self):
  151. parsed = self.parser.parse('foo[*].bar[*].baz')
  152. self.assertEqual(parsed.search(self.data),
  153. [['one', 'two'], ['three', 'four', 'five']])
  154. def test_wildcard_mix_with_indices(self):
  155. parsed = self.parser.parse('foo[*].bar[0].baz')
  156. self.assertEqual(parsed.search(self.data),
  157. ['one', 'three'])
  158. def test_wildcard_mix_last(self):
  159. parsed = self.parser.parse('foo[0].bar[*].baz')
  160. self.assertEqual(parsed.search(self.data),
  161. ['one', 'two'])
  162. def test_indices_out_of_bounds(self):
  163. parsed = self.parser.parse('foo[*].bar[2].baz')
  164. self.assertEqual(parsed.search(self.data),
  165. ['five'])
  166. def test_root_indices(self):
  167. parsed = self.parser.parse('[0]')
  168. self.assertEqual(parsed.search(['one', 'two']), 'one')
  169. def test_root_wildcard(self):
  170. parsed = self.parser.parse('*.foo')
  171. data = {'top1': {'foo': 'bar'}, 'top2': {'foo': 'baz'},
  172. 'top3': {'notfoo': 'notfoo'}}
  173. # Sorted is being used because the order of the keys are not
  174. # required to be in any specific order.
  175. self.assertEqual(sorted(parsed.search(data)), sorted(['bar', 'baz']))
  176. self.assertEqual(sorted(self.parser.parse('*.notfoo').search(data)),
  177. sorted(['notfoo']))
  178. def test_only_wildcard(self):
  179. parsed = self.parser.parse('*')
  180. data = {'foo': 'a', 'bar': 'b', 'baz': 'c'}
  181. self.assertEqual(sorted(parsed.search(data)), sorted(['a', 'b', 'c']))
  182. def test_escape_sequences(self):
  183. self.assertEqual(self.parser.parse(r'"foo\tbar"').search(
  184. {'foo\tbar': 'baz'}), 'baz')
  185. self.assertEqual(self.parser.parse(r'"foo\nbar"').search(
  186. {'foo\nbar': 'baz'}), 'baz')
  187. self.assertEqual(self.parser.parse(r'"foo\bbar"').search(
  188. {'foo\bbar': 'baz'}), 'baz')
  189. self.assertEqual(self.parser.parse(r'"foo\fbar"').search(
  190. {'foo\fbar': 'baz'}), 'baz')
  191. self.assertEqual(self.parser.parse(r'"foo\rbar"').search(
  192. {'foo\rbar': 'baz'}), 'baz')
  193. def test_consecutive_escape_sequences(self):
  194. parsed = self.parser.parse(r'"foo\\nbar"')
  195. self.assertEqual(parsed.search({'foo\\nbar': 'baz'}), 'baz')
  196. parsed = self.parser.parse(r'"foo\n\t\rbar"')
  197. self.assertEqual(parsed.search({'foo\n\t\rbar': 'baz'}), 'baz')
  198. def test_escape_sequence_at_end_of_string_not_allowed(self):
  199. with self.assertRaises(ValueError):
  200. self.parser.parse('foobar\\')
  201. def test_wildcard_with_multiselect(self):
  202. parsed = self.parser.parse('foo.*.{a: a, b: b}')
  203. data = {
  204. 'foo': {
  205. 'one': {
  206. 'a': {'c': 'CORRECT', 'd': 'other'},
  207. 'b': {'c': 'ALSOCORRECT', 'd': 'other'},
  208. },
  209. 'two': {
  210. 'a': {'c': 'CORRECT', 'd': 'other'},
  211. 'c': {'c': 'WRONG', 'd': 'other'},
  212. },
  213. }
  214. }
  215. match = parsed.search(data)
  216. self.assertEqual(len(match), 2)
  217. self.assertIn('a', match[0])
  218. self.assertIn('b', match[0])
  219. self.assertIn('a', match[1])
  220. self.assertIn('b', match[1])
  221. class TestMergedLists(unittest.TestCase):
  222. def setUp(self):
  223. self.parser = parser.Parser()
  224. self.data = {
  225. "foo": [
  226. [["one", "two"], ["three", "four"]],
  227. [["five", "six"], ["seven", "eight"]],
  228. [["nine"], ["ten"]]
  229. ]
  230. }
  231. def test_merge_with_indices(self):
  232. parsed = self.parser.parse('foo[][0]')
  233. match = parsed.search(self.data)
  234. self.assertEqual(match, ["one", "three", "five", "seven",
  235. "nine", "ten"])
  236. def test_trailing_merged_operator(self):
  237. parsed = self.parser.parse('foo[]')
  238. match = parsed.search(self.data)
  239. self.assertEqual(
  240. match,
  241. [["one", "two"], ["three", "four"],
  242. ["five", "six"], ["seven", "eight"],
  243. ["nine"], ["ten"]])
  244. class TestParserCaching(unittest.TestCase):
  245. def test_compile_lots_of_expressions(self):
  246. # We have to be careful here because this is an implementation detail
  247. # that should be abstracted from the user, but we need to make sure we
  248. # exercise the code and that it doesn't blow up.
  249. p = parser.Parser()
  250. compiled = []
  251. compiled2 = []
  252. for i in range(parser.Parser._MAX_SIZE + 1):
  253. compiled.append(p.parse('foo%s' % i))
  254. # Rerun the test and half of these entries should be from the
  255. # cache but they should still be equal to compiled.
  256. for i in range(parser.Parser._MAX_SIZE + 1):
  257. compiled2.append(p.parse('foo%s' % i))
  258. self.assertEqual(len(compiled), len(compiled2))
  259. self.assertEqual(
  260. [expr.parsed for expr in compiled],
  261. [expr.parsed for expr in compiled2])
  262. def test_cache_purge(self):
  263. p = parser.Parser()
  264. first = p.parse('foo')
  265. cached = p.parse('foo')
  266. p.purge()
  267. second = p.parse('foo')
  268. self.assertEqual(first.parsed,
  269. second.parsed)
  270. self.assertEqual(first.parsed,
  271. cached.parsed)
  272. class TestParserAddsExpressionAttribute(unittest.TestCase):
  273. def test_expression_available_from_parser(self):
  274. p = parser.Parser()
  275. parsed = p.parse('foo.bar')
  276. self.assertEqual(parsed.expression, 'foo.bar')
  277. class TestParsedResultAddsOptions(unittest.TestCase):
  278. def test_can_have_ordered_dict(self):
  279. p = parser.Parser()
  280. parsed = p.parse('{a: a, b: b, c: c}')
  281. options = visitor.Options(dict_cls=OrderedDict)
  282. result = parsed.search(
  283. {"c": "c", "b": "b", "a": "a"}, options=options)
  284. # The order should be 'a', 'b' because we're using an
  285. # OrderedDict
  286. self.assertEqual(list(result), ['a', 'b', 'c'])
  287. class TestRenderGraphvizFile(unittest.TestCase):
  288. def test_dot_file_rendered(self):
  289. p = parser.Parser()
  290. result = p.parse('foo')
  291. dot_contents = result._render_dot_file()
  292. self.assertEqual(dot_contents,
  293. 'digraph AST {\nfield1 [label="field(foo)"]\n}')
  294. def test_dot_file_subexpr(self):
  295. p = parser.Parser()
  296. result = p.parse('foo.bar')
  297. dot_contents = result._render_dot_file()
  298. self.assertEqual(
  299. dot_contents,
  300. 'digraph AST {\n'
  301. 'subexpression1 [label="subexpression()"]\n'
  302. ' subexpression1 -> field2\n'
  303. 'field2 [label="field(foo)"]\n'
  304. ' subexpression1 -> field3\n'
  305. 'field3 [label="field(bar)"]\n}')
  306. if __name__ == '__main__':
  307. unittest.main()