parser.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802
  1. # coding: utf-8
  2. from __future__ import absolute_import
  3. # The following YAML grammar is LL(1) and is parsed by a recursive descent
  4. # parser.
  5. #
  6. # stream ::= STREAM-START implicit_document? explicit_document*
  7. # STREAM-END
  8. # implicit_document ::= block_node DOCUMENT-END*
  9. # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
  10. # block_node_or_indentless_sequence ::=
  11. # ALIAS
  12. # | properties (block_content |
  13. # indentless_block_sequence)?
  14. # | block_content
  15. # | indentless_block_sequence
  16. # block_node ::= ALIAS
  17. # | properties block_content?
  18. # | block_content
  19. # flow_node ::= ALIAS
  20. # | properties flow_content?
  21. # | flow_content
  22. # properties ::= TAG ANCHOR? | ANCHOR TAG?
  23. # block_content ::= block_collection | flow_collection | SCALAR
  24. # flow_content ::= flow_collection | SCALAR
  25. # block_collection ::= block_sequence | block_mapping
  26. # flow_collection ::= flow_sequence | flow_mapping
  27. # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)*
  28. # BLOCK-END
  29. # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
  30. # block_mapping ::= BLOCK-MAPPING_START
  31. # ((KEY block_node_or_indentless_sequence?)?
  32. # (VALUE block_node_or_indentless_sequence?)?)*
  33. # BLOCK-END
  34. # flow_sequence ::= FLOW-SEQUENCE-START
  35. # (flow_sequence_entry FLOW-ENTRY)*
  36. # flow_sequence_entry?
  37. # FLOW-SEQUENCE-END
  38. # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  39. # flow_mapping ::= FLOW-MAPPING-START
  40. # (flow_mapping_entry FLOW-ENTRY)*
  41. # flow_mapping_entry?
  42. # FLOW-MAPPING-END
  43. # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  44. #
  45. # FIRST sets:
  46. #
  47. # stream: { STREAM-START }
  48. # explicit_document: { DIRECTIVE DOCUMENT-START }
  49. # implicit_document: FIRST(block_node)
  50. # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START
  51. # BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
  52. # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
  53. # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START
  54. # FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
  55. # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
  56. # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
  57. # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
  58. # block_sequence: { BLOCK-SEQUENCE-START }
  59. # block_mapping: { BLOCK-MAPPING-START }
  60. # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR
  61. # BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START
  62. # FLOW-MAPPING-START BLOCK-ENTRY }
  63. # indentless_sequence: { ENTRY }
  64. # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
  65. # flow_sequence: { FLOW-SEQUENCE-START }
  66. # flow_mapping: { FLOW-MAPPING-START }
  67. # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START
  68. # FLOW-MAPPING-START KEY }
  69. # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START
  70. # FLOW-MAPPING-START KEY }
  71. # need to have full path with import, as pkg_resources tries to load parser.py in __init__.py
  72. # only to not do anything with the package afterwards
  73. # and for Jython too
  74. from ruamel.yaml.error import MarkedYAMLError
  75. from ruamel.yaml.tokens import * # NOQA
  76. from ruamel.yaml.events import * # NOQA
  77. from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError # NOQA
  78. from ruamel.yaml.compat import utf8, nprint, nprintf # NOQA
  79. if False: # MYPY
  80. from typing import Any, Dict, Optional, List # NOQA
  81. __all__ = ['Parser', 'RoundTripParser', 'ParserError']
  82. class ParserError(MarkedYAMLError):
  83. pass
  84. class Parser(object):
  85. # Since writing a recursive-descendant parser is a straightforward task, we
  86. # do not give many comments here.
  87. DEFAULT_TAGS = {u'!': u'!', u'!!': u'tag:yaml.org,2002:'}
  88. def __init__(self, loader):
  89. # type: (Any) -> None
  90. self.loader = loader
  91. if self.loader is not None and getattr(self.loader, '_parser', None) is None:
  92. self.loader._parser = self
  93. self.reset_parser()
  94. def reset_parser(self):
  95. # type: () -> None
  96. # Reset the state attributes (to clear self-references)
  97. self.current_event = None
  98. self.tag_handles = {} # type: Dict[Any, Any]
  99. self.states = [] # type: List[Any]
  100. self.marks = [] # type: List[Any]
  101. self.state = self.parse_stream_start # type: Any
  102. def dispose(self):
  103. # type: () -> None
  104. self.reset_parser()
  105. @property
  106. def scanner(self):
  107. # type: () -> Any
  108. if hasattr(self.loader, 'typ'):
  109. return self.loader.scanner
  110. return self.loader._scanner
  111. @property
  112. def resolver(self):
  113. # type: () -> Any
  114. if hasattr(self.loader, 'typ'):
  115. return self.loader.resolver
  116. return self.loader._resolver
  117. def check_event(self, *choices):
  118. # type: (Any) -> bool
  119. # Check the type of the next event.
  120. if self.current_event is None:
  121. if self.state:
  122. self.current_event = self.state()
  123. if self.current_event is not None:
  124. if not choices:
  125. return True
  126. for choice in choices:
  127. if isinstance(self.current_event, choice):
  128. return True
  129. return False
  130. def peek_event(self):
  131. # type: () -> Any
  132. # Get the next event.
  133. if self.current_event is None:
  134. if self.state:
  135. self.current_event = self.state()
  136. return self.current_event
  137. def get_event(self):
  138. # type: () -> Any
  139. # Get the next event and proceed further.
  140. if self.current_event is None:
  141. if self.state:
  142. self.current_event = self.state()
  143. value = self.current_event
  144. self.current_event = None
  145. return value
  146. # stream ::= STREAM-START implicit_document? explicit_document*
  147. # STREAM-END
  148. # implicit_document ::= block_node DOCUMENT-END*
  149. # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
  150. def parse_stream_start(self):
  151. # type: () -> Any
  152. # Parse the stream start.
  153. token = self.scanner.get_token()
  154. token.move_comment(self.scanner.peek_token())
  155. event = StreamStartEvent(token.start_mark, token.end_mark, encoding=token.encoding)
  156. # Prepare the next state.
  157. self.state = self.parse_implicit_document_start
  158. return event
  159. def parse_implicit_document_start(self):
  160. # type: () -> Any
  161. # Parse an implicit document.
  162. if not self.scanner.check_token(DirectiveToken, DocumentStartToken, StreamEndToken):
  163. self.tag_handles = self.DEFAULT_TAGS
  164. token = self.scanner.peek_token()
  165. start_mark = end_mark = token.start_mark
  166. event = DocumentStartEvent(start_mark, end_mark, explicit=False)
  167. # Prepare the next state.
  168. self.states.append(self.parse_document_end)
  169. self.state = self.parse_block_node
  170. return event
  171. else:
  172. return self.parse_document_start()
  173. def parse_document_start(self):
  174. # type: () -> Any
  175. # Parse any extra document end indicators.
  176. while self.scanner.check_token(DocumentEndToken):
  177. self.scanner.get_token()
  178. # Parse an explicit document.
  179. if not self.scanner.check_token(StreamEndToken):
  180. token = self.scanner.peek_token()
  181. start_mark = token.start_mark
  182. version, tags = self.process_directives()
  183. if not self.scanner.check_token(DocumentStartToken):
  184. raise ParserError(
  185. None,
  186. None,
  187. "expected '<document start>', but found %r" % self.scanner.peek_token().id,
  188. self.scanner.peek_token().start_mark,
  189. )
  190. token = self.scanner.get_token()
  191. end_mark = token.end_mark
  192. # if self.loader is not None and \
  193. # end_mark.line != self.scanner.peek_token().start_mark.line:
  194. # self.loader.scalar_after_indicator = False
  195. event = DocumentStartEvent(
  196. start_mark, end_mark, explicit=True, version=version, tags=tags
  197. ) # type: Any
  198. self.states.append(self.parse_document_end)
  199. self.state = self.parse_document_content
  200. else:
  201. # Parse the end of the stream.
  202. token = self.scanner.get_token()
  203. event = StreamEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  204. assert not self.states
  205. assert not self.marks
  206. self.state = None
  207. return event
  208. def parse_document_end(self):
  209. # type: () -> Any
  210. # Parse the document end.
  211. token = self.scanner.peek_token()
  212. start_mark = end_mark = token.start_mark
  213. explicit = False
  214. if self.scanner.check_token(DocumentEndToken):
  215. token = self.scanner.get_token()
  216. end_mark = token.end_mark
  217. explicit = True
  218. event = DocumentEndEvent(start_mark, end_mark, explicit=explicit)
  219. # Prepare the next state.
  220. if self.resolver.processing_version == (1, 1):
  221. self.state = self.parse_document_start
  222. else:
  223. self.state = self.parse_implicit_document_start
  224. return event
  225. def parse_document_content(self):
  226. # type: () -> Any
  227. if self.scanner.check_token(
  228. DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken
  229. ):
  230. event = self.process_empty_scalar(self.scanner.peek_token().start_mark)
  231. self.state = self.states.pop()
  232. return event
  233. else:
  234. return self.parse_block_node()
  235. def process_directives(self):
  236. # type: () -> Any
  237. yaml_version = None
  238. self.tag_handles = {}
  239. while self.scanner.check_token(DirectiveToken):
  240. token = self.scanner.get_token()
  241. if token.name == u'YAML':
  242. if yaml_version is not None:
  243. raise ParserError(
  244. None, None, 'found duplicate YAML directive', token.start_mark
  245. )
  246. major, minor = token.value
  247. if major != 1:
  248. raise ParserError(
  249. None,
  250. None,
  251. 'found incompatible YAML document (version 1.* is ' 'required)',
  252. token.start_mark,
  253. )
  254. yaml_version = token.value
  255. elif token.name == u'TAG':
  256. handle, prefix = token.value
  257. if handle in self.tag_handles:
  258. raise ParserError(
  259. None, None, 'duplicate tag handle %r' % utf8(handle), token.start_mark
  260. )
  261. self.tag_handles[handle] = prefix
  262. if bool(self.tag_handles):
  263. value = yaml_version, self.tag_handles.copy() # type: Any
  264. else:
  265. value = yaml_version, None
  266. if self.loader is not None and hasattr(self.loader, 'tags'):
  267. self.loader.version = yaml_version
  268. if self.loader.tags is None:
  269. self.loader.tags = {}
  270. for k in self.tag_handles:
  271. self.loader.tags[k] = self.tag_handles[k]
  272. for key in self.DEFAULT_TAGS:
  273. if key not in self.tag_handles:
  274. self.tag_handles[key] = self.DEFAULT_TAGS[key]
  275. return value
  276. # block_node_or_indentless_sequence ::= ALIAS
  277. # | properties (block_content | indentless_block_sequence)?
  278. # | block_content
  279. # | indentless_block_sequence
  280. # block_node ::= ALIAS
  281. # | properties block_content?
  282. # | block_content
  283. # flow_node ::= ALIAS
  284. # | properties flow_content?
  285. # | flow_content
  286. # properties ::= TAG ANCHOR? | ANCHOR TAG?
  287. # block_content ::= block_collection | flow_collection | SCALAR
  288. # flow_content ::= flow_collection | SCALAR
  289. # block_collection ::= block_sequence | block_mapping
  290. # flow_collection ::= flow_sequence | flow_mapping
  291. def parse_block_node(self):
  292. # type: () -> Any
  293. return self.parse_node(block=True)
  294. def parse_flow_node(self):
  295. # type: () -> Any
  296. return self.parse_node()
  297. def parse_block_node_or_indentless_sequence(self):
  298. # type: () -> Any
  299. return self.parse_node(block=True, indentless_sequence=True)
  300. def transform_tag(self, handle, suffix):
  301. # type: (Any, Any) -> Any
  302. return self.tag_handles[handle] + suffix
  303. def parse_node(self, block=False, indentless_sequence=False):
  304. # type: (bool, bool) -> Any
  305. if self.scanner.check_token(AliasToken):
  306. token = self.scanner.get_token()
  307. event = AliasEvent(token.value, token.start_mark, token.end_mark) # type: Any
  308. self.state = self.states.pop()
  309. return event
  310. anchor = None
  311. tag = None
  312. start_mark = end_mark = tag_mark = None
  313. if self.scanner.check_token(AnchorToken):
  314. token = self.scanner.get_token()
  315. start_mark = token.start_mark
  316. end_mark = token.end_mark
  317. anchor = token.value
  318. if self.scanner.check_token(TagToken):
  319. token = self.scanner.get_token()
  320. tag_mark = token.start_mark
  321. end_mark = token.end_mark
  322. tag = token.value
  323. elif self.scanner.check_token(TagToken):
  324. token = self.scanner.get_token()
  325. start_mark = tag_mark = token.start_mark
  326. end_mark = token.end_mark
  327. tag = token.value
  328. if self.scanner.check_token(AnchorToken):
  329. token = self.scanner.get_token()
  330. start_mark = tag_mark = token.start_mark
  331. end_mark = token.end_mark
  332. anchor = token.value
  333. if tag is not None:
  334. handle, suffix = tag
  335. if handle is not None:
  336. if handle not in self.tag_handles:
  337. raise ParserError(
  338. 'while parsing a node',
  339. start_mark,
  340. 'found undefined tag handle %r' % utf8(handle),
  341. tag_mark,
  342. )
  343. tag = self.transform_tag(handle, suffix)
  344. else:
  345. tag = suffix
  346. # if tag == u'!':
  347. # raise ParserError("while parsing a node", start_mark,
  348. # "found non-specific tag '!'", tag_mark,
  349. # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag'
  350. # and share your opinion.")
  351. if start_mark is None:
  352. start_mark = end_mark = self.scanner.peek_token().start_mark
  353. event = None
  354. implicit = tag is None or tag == u'!'
  355. if indentless_sequence and self.scanner.check_token(BlockEntryToken):
  356. comment = None
  357. pt = self.scanner.peek_token()
  358. if pt.comment and pt.comment[0]:
  359. comment = [pt.comment[0], []]
  360. pt.comment[0] = None
  361. end_mark = self.scanner.peek_token().end_mark
  362. event = SequenceStartEvent(
  363. anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment
  364. )
  365. self.state = self.parse_indentless_sequence_entry
  366. return event
  367. if self.scanner.check_token(ScalarToken):
  368. token = self.scanner.get_token()
  369. # self.scanner.peek_token_same_line_comment(token)
  370. end_mark = token.end_mark
  371. if (token.plain and tag is None) or tag == u'!':
  372. implicit = (True, False)
  373. elif tag is None:
  374. implicit = (False, True)
  375. else:
  376. implicit = (False, False)
  377. # nprint('se', token.value, token.comment)
  378. event = ScalarEvent(
  379. anchor,
  380. tag,
  381. implicit,
  382. token.value,
  383. start_mark,
  384. end_mark,
  385. style=token.style,
  386. comment=token.comment,
  387. )
  388. self.state = self.states.pop()
  389. elif self.scanner.check_token(FlowSequenceStartToken):
  390. pt = self.scanner.peek_token()
  391. end_mark = pt.end_mark
  392. event = SequenceStartEvent(
  393. anchor,
  394. tag,
  395. implicit,
  396. start_mark,
  397. end_mark,
  398. flow_style=True,
  399. comment=pt.comment,
  400. )
  401. self.state = self.parse_flow_sequence_first_entry
  402. elif self.scanner.check_token(FlowMappingStartToken):
  403. pt = self.scanner.peek_token()
  404. end_mark = pt.end_mark
  405. event = MappingStartEvent(
  406. anchor,
  407. tag,
  408. implicit,
  409. start_mark,
  410. end_mark,
  411. flow_style=True,
  412. comment=pt.comment,
  413. )
  414. self.state = self.parse_flow_mapping_first_key
  415. elif block and self.scanner.check_token(BlockSequenceStartToken):
  416. end_mark = self.scanner.peek_token().start_mark
  417. # should inserting the comment be dependent on the
  418. # indentation?
  419. pt = self.scanner.peek_token()
  420. comment = pt.comment
  421. # nprint('pt0', type(pt))
  422. if comment is None or comment[1] is None:
  423. comment = pt.split_comment()
  424. # nprint('pt1', comment)
  425. event = SequenceStartEvent(
  426. anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment
  427. )
  428. self.state = self.parse_block_sequence_first_entry
  429. elif block and self.scanner.check_token(BlockMappingStartToken):
  430. end_mark = self.scanner.peek_token().start_mark
  431. comment = self.scanner.peek_token().comment
  432. event = MappingStartEvent(
  433. anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment
  434. )
  435. self.state = self.parse_block_mapping_first_key
  436. elif anchor is not None or tag is not None:
  437. # Empty scalars are allowed even if a tag or an anchor is
  438. # specified.
  439. event = ScalarEvent(anchor, tag, (implicit, False), "", start_mark, end_mark)
  440. self.state = self.states.pop()
  441. else:
  442. if block:
  443. node = 'block'
  444. else:
  445. node = 'flow'
  446. token = self.scanner.peek_token()
  447. raise ParserError(
  448. 'while parsing a %s node' % node,
  449. start_mark,
  450. 'expected the node content, but found %r' % token.id,
  451. token.start_mark,
  452. )
  453. return event
  454. # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)*
  455. # BLOCK-END
  456. def parse_block_sequence_first_entry(self):
  457. # type: () -> Any
  458. token = self.scanner.get_token()
  459. # move any comment from start token
  460. # token.move_comment(self.scanner.peek_token())
  461. self.marks.append(token.start_mark)
  462. return self.parse_block_sequence_entry()
  463. def parse_block_sequence_entry(self):
  464. # type: () -> Any
  465. if self.scanner.check_token(BlockEntryToken):
  466. token = self.scanner.get_token()
  467. token.move_comment(self.scanner.peek_token())
  468. if not self.scanner.check_token(BlockEntryToken, BlockEndToken):
  469. self.states.append(self.parse_block_sequence_entry)
  470. return self.parse_block_node()
  471. else:
  472. self.state = self.parse_block_sequence_entry
  473. return self.process_empty_scalar(token.end_mark)
  474. if not self.scanner.check_token(BlockEndToken):
  475. token = self.scanner.peek_token()
  476. raise ParserError(
  477. 'while parsing a block collection',
  478. self.marks[-1],
  479. 'expected <block end>, but found %r' % token.id,
  480. token.start_mark,
  481. )
  482. token = self.scanner.get_token() # BlockEndToken
  483. event = SequenceEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  484. self.state = self.states.pop()
  485. self.marks.pop()
  486. return event
  487. # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
  488. # indentless_sequence?
  489. # sequence:
  490. # - entry
  491. # - nested
  492. def parse_indentless_sequence_entry(self):
  493. # type: () -> Any
  494. if self.scanner.check_token(BlockEntryToken):
  495. token = self.scanner.get_token()
  496. token.move_comment(self.scanner.peek_token())
  497. if not self.scanner.check_token(
  498. BlockEntryToken, KeyToken, ValueToken, BlockEndToken
  499. ):
  500. self.states.append(self.parse_indentless_sequence_entry)
  501. return self.parse_block_node()
  502. else:
  503. self.state = self.parse_indentless_sequence_entry
  504. return self.process_empty_scalar(token.end_mark)
  505. token = self.scanner.peek_token()
  506. event = SequenceEndEvent(token.start_mark, token.start_mark, comment=token.comment)
  507. self.state = self.states.pop()
  508. return event
  509. # block_mapping ::= BLOCK-MAPPING_START
  510. # ((KEY block_node_or_indentless_sequence?)?
  511. # (VALUE block_node_or_indentless_sequence?)?)*
  512. # BLOCK-END
  513. def parse_block_mapping_first_key(self):
  514. # type: () -> Any
  515. token = self.scanner.get_token()
  516. self.marks.append(token.start_mark)
  517. return self.parse_block_mapping_key()
  518. def parse_block_mapping_key(self):
  519. # type: () -> Any
  520. if self.scanner.check_token(KeyToken):
  521. token = self.scanner.get_token()
  522. token.move_comment(self.scanner.peek_token())
  523. if not self.scanner.check_token(KeyToken, ValueToken, BlockEndToken):
  524. self.states.append(self.parse_block_mapping_value)
  525. return self.parse_block_node_or_indentless_sequence()
  526. else:
  527. self.state = self.parse_block_mapping_value
  528. return self.process_empty_scalar(token.end_mark)
  529. if self.resolver.processing_version > (1, 1) and self.scanner.check_token(ValueToken):
  530. self.state = self.parse_block_mapping_value
  531. return self.process_empty_scalar(self.scanner.peek_token().start_mark)
  532. if not self.scanner.check_token(BlockEndToken):
  533. token = self.scanner.peek_token()
  534. raise ParserError(
  535. 'while parsing a block mapping',
  536. self.marks[-1],
  537. 'expected <block end>, but found %r' % token.id,
  538. token.start_mark,
  539. )
  540. token = self.scanner.get_token()
  541. token.move_comment(self.scanner.peek_token())
  542. event = MappingEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  543. self.state = self.states.pop()
  544. self.marks.pop()
  545. return event
  546. def parse_block_mapping_value(self):
  547. # type: () -> Any
  548. if self.scanner.check_token(ValueToken):
  549. token = self.scanner.get_token()
  550. # value token might have post comment move it to e.g. block
  551. if self.scanner.check_token(ValueToken):
  552. token.move_comment(self.scanner.peek_token())
  553. else:
  554. if not self.scanner.check_token(KeyToken):
  555. token.move_comment(self.scanner.peek_token(), empty=True)
  556. # else: empty value for this key cannot move token.comment
  557. if not self.scanner.check_token(KeyToken, ValueToken, BlockEndToken):
  558. self.states.append(self.parse_block_mapping_key)
  559. return self.parse_block_node_or_indentless_sequence()
  560. else:
  561. self.state = self.parse_block_mapping_key
  562. comment = token.comment
  563. if comment is None:
  564. token = self.scanner.peek_token()
  565. comment = token.comment
  566. if comment:
  567. token._comment = [None, comment[1]]
  568. comment = [comment[0], None]
  569. return self.process_empty_scalar(token.end_mark, comment=comment)
  570. else:
  571. self.state = self.parse_block_mapping_key
  572. token = self.scanner.peek_token()
  573. return self.process_empty_scalar(token.start_mark)
  574. # flow_sequence ::= FLOW-SEQUENCE-START
  575. # (flow_sequence_entry FLOW-ENTRY)*
  576. # flow_sequence_entry?
  577. # FLOW-SEQUENCE-END
  578. # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  579. #
  580. # Note that while production rules for both flow_sequence_entry and
  581. # flow_mapping_entry are equal, their interpretations are different.
  582. # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
  583. # generate an inline mapping (set syntax).
  584. def parse_flow_sequence_first_entry(self):
  585. # type: () -> Any
  586. token = self.scanner.get_token()
  587. self.marks.append(token.start_mark)
  588. return self.parse_flow_sequence_entry(first=True)
  589. def parse_flow_sequence_entry(self, first=False):
  590. # type: (bool) -> Any
  591. if not self.scanner.check_token(FlowSequenceEndToken):
  592. if not first:
  593. if self.scanner.check_token(FlowEntryToken):
  594. self.scanner.get_token()
  595. else:
  596. token = self.scanner.peek_token()
  597. raise ParserError(
  598. 'while parsing a flow sequence',
  599. self.marks[-1],
  600. "expected ',' or ']', but got %r" % token.id,
  601. token.start_mark,
  602. )
  603. if self.scanner.check_token(KeyToken):
  604. token = self.scanner.peek_token()
  605. event = MappingStartEvent(
  606. None, None, True, token.start_mark, token.end_mark, flow_style=True
  607. ) # type: Any
  608. self.state = self.parse_flow_sequence_entry_mapping_key
  609. return event
  610. elif not self.scanner.check_token(FlowSequenceEndToken):
  611. self.states.append(self.parse_flow_sequence_entry)
  612. return self.parse_flow_node()
  613. token = self.scanner.get_token()
  614. event = SequenceEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  615. self.state = self.states.pop()
  616. self.marks.pop()
  617. return event
  618. def parse_flow_sequence_entry_mapping_key(self):
  619. # type: () -> Any
  620. token = self.scanner.get_token()
  621. if not self.scanner.check_token(ValueToken, FlowEntryToken, FlowSequenceEndToken):
  622. self.states.append(self.parse_flow_sequence_entry_mapping_value)
  623. return self.parse_flow_node()
  624. else:
  625. self.state = self.parse_flow_sequence_entry_mapping_value
  626. return self.process_empty_scalar(token.end_mark)
  627. def parse_flow_sequence_entry_mapping_value(self):
  628. # type: () -> Any
  629. if self.scanner.check_token(ValueToken):
  630. token = self.scanner.get_token()
  631. if not self.scanner.check_token(FlowEntryToken, FlowSequenceEndToken):
  632. self.states.append(self.parse_flow_sequence_entry_mapping_end)
  633. return self.parse_flow_node()
  634. else:
  635. self.state = self.parse_flow_sequence_entry_mapping_end
  636. return self.process_empty_scalar(token.end_mark)
  637. else:
  638. self.state = self.parse_flow_sequence_entry_mapping_end
  639. token = self.scanner.peek_token()
  640. return self.process_empty_scalar(token.start_mark)
  641. def parse_flow_sequence_entry_mapping_end(self):
  642. # type: () -> Any
  643. self.state = self.parse_flow_sequence_entry
  644. token = self.scanner.peek_token()
  645. return MappingEndEvent(token.start_mark, token.start_mark)
  646. # flow_mapping ::= FLOW-MAPPING-START
  647. # (flow_mapping_entry FLOW-ENTRY)*
  648. # flow_mapping_entry?
  649. # FLOW-MAPPING-END
  650. # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  651. def parse_flow_mapping_first_key(self):
  652. # type: () -> Any
  653. token = self.scanner.get_token()
  654. self.marks.append(token.start_mark)
  655. return self.parse_flow_mapping_key(first=True)
  656. def parse_flow_mapping_key(self, first=False):
  657. # type: (Any) -> Any
  658. if not self.scanner.check_token(FlowMappingEndToken):
  659. if not first:
  660. if self.scanner.check_token(FlowEntryToken):
  661. self.scanner.get_token()
  662. else:
  663. token = self.scanner.peek_token()
  664. raise ParserError(
  665. 'while parsing a flow mapping',
  666. self.marks[-1],
  667. "expected ',' or '}', but got %r" % token.id,
  668. token.start_mark,
  669. )
  670. if self.scanner.check_token(KeyToken):
  671. token = self.scanner.get_token()
  672. if not self.scanner.check_token(
  673. ValueToken, FlowEntryToken, FlowMappingEndToken
  674. ):
  675. self.states.append(self.parse_flow_mapping_value)
  676. return self.parse_flow_node()
  677. else:
  678. self.state = self.parse_flow_mapping_value
  679. return self.process_empty_scalar(token.end_mark)
  680. elif self.resolver.processing_version > (1, 1) and self.scanner.check_token(
  681. ValueToken
  682. ):
  683. self.state = self.parse_flow_mapping_value
  684. return self.process_empty_scalar(self.scanner.peek_token().end_mark)
  685. elif not self.scanner.check_token(FlowMappingEndToken):
  686. self.states.append(self.parse_flow_mapping_empty_value)
  687. return self.parse_flow_node()
  688. token = self.scanner.get_token()
  689. event = MappingEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  690. self.state = self.states.pop()
  691. self.marks.pop()
  692. return event
  693. def parse_flow_mapping_value(self):
  694. # type: () -> Any
  695. if self.scanner.check_token(ValueToken):
  696. token = self.scanner.get_token()
  697. if not self.scanner.check_token(FlowEntryToken, FlowMappingEndToken):
  698. self.states.append(self.parse_flow_mapping_key)
  699. return self.parse_flow_node()
  700. else:
  701. self.state = self.parse_flow_mapping_key
  702. return self.process_empty_scalar(token.end_mark)
  703. else:
  704. self.state = self.parse_flow_mapping_key
  705. token = self.scanner.peek_token()
  706. return self.process_empty_scalar(token.start_mark)
  707. def parse_flow_mapping_empty_value(self):
  708. # type: () -> Any
  709. self.state = self.parse_flow_mapping_key
  710. return self.process_empty_scalar(self.scanner.peek_token().start_mark)
  711. def process_empty_scalar(self, mark, comment=None):
  712. # type: (Any, Any) -> Any
  713. return ScalarEvent(None, None, (True, False), "", mark, mark, comment=comment)
  714. class RoundTripParser(Parser):
  715. """roundtrip is a safe loader, that wants to see the unmangled tag"""
  716. def transform_tag(self, handle, suffix):
  717. # type: (Any, Any) -> Any
  718. # return self.tag_handles[handle]+suffix
  719. if handle == '!!' and suffix in (
  720. u'null',
  721. u'bool',
  722. u'int',
  723. u'float',
  724. u'binary',
  725. u'timestamp',
  726. u'omap',
  727. u'pairs',
  728. u'set',
  729. u'str',
  730. u'seq',
  731. u'map',
  732. ):
  733. return Parser.transform_tag(self, handle, suffix)
  734. return handle + suffix