parser.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. # coding: utf-8
  2. # The following YAML grammar is LL(1) and is parsed by a recursive descent
  3. # parser.
  4. #
  5. # stream ::= STREAM-START implicit_document? explicit_document*
  6. # STREAM-END
  7. # implicit_document ::= block_node DOCUMENT-END*
  8. # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
  9. # block_node_or_indentless_sequence ::=
  10. # ALIAS
  11. # | properties (block_content |
  12. # indentless_block_sequence)?
  13. # | block_content
  14. # | indentless_block_sequence
  15. # block_node ::= ALIAS
  16. # | properties block_content?
  17. # | block_content
  18. # flow_node ::= ALIAS
  19. # | properties flow_content?
  20. # | flow_content
  21. # properties ::= TAG ANCHOR? | ANCHOR TAG?
  22. # block_content ::= block_collection | flow_collection | SCALAR
  23. # flow_content ::= flow_collection | SCALAR
  24. # block_collection ::= block_sequence | block_mapping
  25. # flow_collection ::= flow_sequence | flow_mapping
  26. # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)*
  27. # BLOCK-END
  28. # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
  29. # block_mapping ::= BLOCK-MAPPING_START
  30. # ((KEY block_node_or_indentless_sequence?)?
  31. # (VALUE block_node_or_indentless_sequence?)?)*
  32. # BLOCK-END
  33. # flow_sequence ::= FLOW-SEQUENCE-START
  34. # (flow_sequence_entry FLOW-ENTRY)*
  35. # flow_sequence_entry?
  36. # FLOW-SEQUENCE-END
  37. # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  38. # flow_mapping ::= FLOW-MAPPING-START
  39. # (flow_mapping_entry FLOW-ENTRY)*
  40. # flow_mapping_entry?
  41. # FLOW-MAPPING-END
  42. # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  43. #
  44. # FIRST sets:
  45. #
  46. # stream: { STREAM-START <}
  47. # explicit_document: { DIRECTIVE DOCUMENT-START }
  48. # implicit_document: FIRST(block_node)
  49. # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START
  50. # BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
  51. # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
  52. # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START
  53. # FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
  54. # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
  55. # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
  56. # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
  57. # block_sequence: { BLOCK-SEQUENCE-START }
  58. # block_mapping: { BLOCK-MAPPING-START }
  59. # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR
  60. # BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START
  61. # FLOW-MAPPING-START BLOCK-ENTRY }
  62. # indentless_sequence: { ENTRY }
  63. # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
  64. # flow_sequence: { FLOW-SEQUENCE-START }
  65. # flow_mapping: { FLOW-MAPPING-START }
  66. # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START
  67. # FLOW-MAPPING-START KEY }
  68. # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START
  69. # FLOW-MAPPING-START KEY }
  70. # need to have full path with import, as pkg_resources tries to load parser.py in __init__.py
  71. # only to not do anything with the package afterwards
  72. # and for Jython too
  73. from ruamel.yaml.error import MarkedYAMLError
  74. from ruamel.yaml.tokens import * # NOQA
  75. from ruamel.yaml.events import * # NOQA
  76. from ruamel.yaml.scanner import Scanner, RoundTripScanner, ScannerError # NOQA
  77. from ruamel.yaml.scanner import BlankLineComment
  78. from ruamel.yaml.comments import C_PRE, C_POST, C_SPLIT_ON_FIRST_BLANK
  79. from ruamel.yaml.compat import nprint, nprintf # NOQA
  80. from ruamel.yaml.tag import Tag
  81. from typing import Any, Dict, Optional, List, Optional # NOQA
  82. __all__ = ['Parser', 'RoundTripParser', 'ParserError']
  83. def xprintf(*args: Any, **kw: Any) -> Any:
  84. return nprintf(*args, **kw)
  85. pass
  86. class ParserError(MarkedYAMLError):
  87. pass
  88. class Parser:
  89. # Since writing a recursive-descendant parser is a straightforward task, we
  90. # do not give many comments here.
  91. DEFAULT_TAGS = {'!': '!', '!!': 'tag:yaml.org,2002:'}
  92. def __init__(self, loader: Any) -> None:
  93. self.loader = loader
  94. if self.loader is not None and getattr(self.loader, '_parser', None) is None:
  95. self.loader._parser = self
  96. self.reset_parser()
  97. def reset_parser(self) -> None:
  98. # Reset the state attributes (to clear self-references)
  99. self.current_event = self.last_event = None
  100. self.tag_handles: Dict[Any, Any] = {}
  101. self.states: List[Any] = []
  102. self.marks: List[Any] = []
  103. self.state: Any = self.parse_stream_start
  104. def dispose(self) -> None:
  105. self.reset_parser()
  106. @property
  107. def scanner(self) -> Any:
  108. if hasattr(self.loader, 'typ'):
  109. return self.loader.scanner
  110. return self.loader._scanner
  111. @property
  112. def resolver(self) -> Any:
  113. if hasattr(self.loader, 'typ'):
  114. return self.loader.resolver
  115. return self.loader._resolver
  116. def check_event(self, *choices: Any) -> bool:
  117. # Check the type of the next event.
  118. if self.current_event is None:
  119. if self.state:
  120. self.current_event = self.state()
  121. if self.current_event is not None:
  122. if not choices:
  123. return True
  124. for choice in choices:
  125. if isinstance(self.current_event, choice):
  126. return True
  127. return False
  128. def peek_event(self) -> Any:
  129. # Get the next event.
  130. if self.current_event is None:
  131. if self.state:
  132. self.current_event = self.state()
  133. return self.current_event
  134. def get_event(self) -> Any:
  135. # Get the next event and proceed further.
  136. if self.current_event is None:
  137. if self.state:
  138. self.current_event = self.state()
  139. # assert self.current_event is not None
  140. # if self.current_event.end_mark.line != self.peek_event().start_mark.line:
  141. xprintf('get_event', repr(self.current_event), self.peek_event().start_mark.line)
  142. self.last_event = value = self.current_event
  143. self.current_event = None
  144. return value
  145. # stream ::= STREAM-START implicit_document? explicit_document*
  146. # STREAM-END
  147. # implicit_document ::= block_node DOCUMENT-END*
  148. # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
  149. def parse_stream_start(self) -> Any:
  150. # Parse the stream start.
  151. token = self.scanner.get_token()
  152. self.move_token_comment(token)
  153. event = StreamStartEvent(token.start_mark, token.end_mark, encoding=token.encoding)
  154. # Prepare the next state.
  155. self.state = self.parse_implicit_document_start
  156. return event
  157. def parse_implicit_document_start(self) -> Any:
  158. # Parse an implicit document.
  159. if not self.scanner.check_token(DirectiveToken, DocumentStartToken, StreamEndToken):
  160. # don't need copy, as an implicit tag doesn't add tag_handles
  161. self.tag_handles = self.DEFAULT_TAGS
  162. token = self.scanner.peek_token()
  163. start_mark = end_mark = token.start_mark
  164. event = DocumentStartEvent(start_mark, end_mark, explicit=False)
  165. # Prepare the next state.
  166. self.states.append(self.parse_document_end)
  167. self.state = self.parse_block_node
  168. return event
  169. else:
  170. return self.parse_document_start()
  171. def parse_document_start(self) -> Any:
  172. # Parse any extra document end indicators.
  173. while self.scanner.check_token(DocumentEndToken):
  174. self.scanner.get_token()
  175. # Parse an explicit document.
  176. if not self.scanner.check_token(StreamEndToken):
  177. version, tags = self.process_directives()
  178. if not self.scanner.check_token(DocumentStartToken):
  179. raise ParserError(
  180. None,
  181. None,
  182. "expected '<document start>', "
  183. f'but found {self.scanner.peek_token().id,!r}',
  184. self.scanner.peek_token().start_mark,
  185. )
  186. token = self.scanner.get_token()
  187. start_mark = token.start_mark
  188. end_mark = token.end_mark
  189. # if self.loader is not None and \
  190. # end_mark.line != self.scanner.peek_token().start_mark.line:
  191. # self.loader.scalar_after_indicator = False
  192. event: Any = DocumentStartEvent(
  193. start_mark,
  194. end_mark,
  195. explicit=True,
  196. version=version,
  197. tags=tags,
  198. comment=token.comment,
  199. )
  200. self.states.append(self.parse_document_end)
  201. self.state = self.parse_document_content
  202. else:
  203. # Parse the end of the stream.
  204. token = self.scanner.get_token()
  205. event = StreamEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  206. assert not self.states
  207. assert not self.marks
  208. self.state = None
  209. return event
  210. def parse_document_end(self) -> Any:
  211. # Parse the document end.
  212. token = self.scanner.peek_token()
  213. start_mark = end_mark = token.start_mark
  214. explicit = False
  215. if self.scanner.check_token(DocumentEndToken):
  216. token = self.scanner.get_token()
  217. # if token.end_mark.line != self.peek_event().start_mark.line:
  218. pt = self.scanner.peek_token()
  219. if not isinstance(pt, StreamEndToken) and (
  220. token.end_mark.line == pt.start_mark.line
  221. ):
  222. raise ParserError(
  223. None,
  224. None,
  225. 'found non-comment content after document end marker, '
  226. f'{self.scanner.peek_token().id,!r}',
  227. self.scanner.peek_token().start_mark,
  228. )
  229. end_mark = token.end_mark
  230. explicit = True
  231. event = DocumentEndEvent(start_mark, end_mark, explicit=explicit)
  232. # Prepare the next state.
  233. if self.resolver.processing_version == (1, 1):
  234. self.state = self.parse_document_start
  235. else:
  236. if explicit:
  237. # found a document end marker, can be followed by implicit document
  238. self.state = self.parse_implicit_document_start
  239. else:
  240. self.state = self.parse_document_start
  241. return event
  242. def parse_document_content(self) -> Any:
  243. if self.scanner.check_token(
  244. DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken,
  245. ):
  246. event = self.process_empty_scalar(self.scanner.peek_token().start_mark)
  247. self.state = self.states.pop()
  248. return event
  249. else:
  250. return self.parse_block_node()
  251. def process_directives(self) -> Any:
  252. yaml_version = None
  253. self.tag_handles = {}
  254. while self.scanner.check_token(DirectiveToken):
  255. token = self.scanner.get_token()
  256. if token.name == 'YAML':
  257. if yaml_version is not None:
  258. raise ParserError(
  259. None, None, 'found duplicate YAML directive', token.start_mark,
  260. )
  261. major, minor = token.value
  262. if major != 1:
  263. raise ParserError(
  264. None,
  265. None,
  266. 'found incompatible YAML document (version 1.* is required)',
  267. token.start_mark,
  268. )
  269. yaml_version = token.value
  270. elif token.name == 'TAG':
  271. handle, prefix = token.value
  272. if handle in self.tag_handles:
  273. raise ParserError(
  274. None, None, f'duplicate tag handle {handle!r}', token.start_mark,
  275. )
  276. self.tag_handles[handle] = prefix
  277. if bool(self.tag_handles):
  278. value: Any = (yaml_version, self.tag_handles.copy())
  279. else:
  280. value = yaml_version, None
  281. if self.loader is not None and hasattr(self.loader, 'tags'):
  282. self.loader.version = yaml_version
  283. if self.loader.tags is None:
  284. self.loader.tags = {}
  285. for k in self.tag_handles:
  286. self.loader.tags[k] = self.tag_handles[k]
  287. for key in self.DEFAULT_TAGS:
  288. if key not in self.tag_handles:
  289. self.tag_handles[key] = self.DEFAULT_TAGS[key]
  290. return value
  291. # block_node_or_indentless_sequence ::= ALIAS
  292. # | properties (block_content | indentless_block_sequence)?
  293. # | block_content
  294. # | indentless_block_sequence
  295. # block_node ::= ALIAS
  296. # | properties block_content?
  297. # | block_content
  298. # flow_node ::= ALIAS
  299. # | properties flow_content?
  300. # | flow_content
  301. # properties ::= TAG ANCHOR? | ANCHOR TAG?
  302. # block_content ::= block_collection | flow_collection | SCALAR
  303. # flow_content ::= flow_collection | SCALAR
  304. # block_collection ::= block_sequence | block_mapping
  305. # flow_collection ::= flow_sequence | flow_mapping
  306. def parse_block_node(self) -> Any:
  307. return self.parse_node(block=True)
  308. def parse_flow_node(self) -> Any:
  309. return self.parse_node()
  310. def parse_block_node_or_indentless_sequence(self) -> Any:
  311. return self.parse_node(block=True, indentless_sequence=True)
  312. # def transform_tag(self, handle: Any, suffix: Any) -> Any:
  313. # return self.tag_handles[handle] + suffix
  314. def select_tag_transform(self, tag: Tag) -> None:
  315. if tag is None:
  316. return
  317. tag.select_transform(False)
  318. def parse_node(self, block: bool = False, indentless_sequence: bool = False) -> Any:
  319. if self.scanner.check_token(AliasToken):
  320. token = self.scanner.get_token()
  321. event: Any = AliasEvent(token.value, token.start_mark, token.end_mark)
  322. self.state = self.states.pop()
  323. return event
  324. anchor = None
  325. tag = None
  326. start_mark = end_mark = tag_mark = None
  327. if self.scanner.check_token(AnchorToken):
  328. token = self.scanner.get_token()
  329. self.move_token_comment(token)
  330. start_mark = token.start_mark
  331. end_mark = token.end_mark
  332. anchor = token.value
  333. if self.scanner.check_token(TagToken):
  334. token = self.scanner.get_token()
  335. tag_mark = token.start_mark
  336. end_mark = token.end_mark
  337. # tag = token.value
  338. tag = Tag(
  339. handle=token.value[0], suffix=token.value[1], handles=self.tag_handles,
  340. )
  341. elif self.scanner.check_token(TagToken):
  342. token = self.scanner.get_token()
  343. start_mark = tag_mark = token.start_mark
  344. end_mark = token.end_mark
  345. # tag = token.value
  346. tag = Tag(handle=token.value[0], suffix=token.value[1], handles=self.tag_handles)
  347. if self.scanner.check_token(AnchorToken):
  348. token = self.scanner.get_token()
  349. start_mark = tag_mark = token.start_mark
  350. end_mark = token.end_mark
  351. anchor = token.value
  352. if tag is not None:
  353. self.select_tag_transform(tag)
  354. if tag.check_handle():
  355. raise ParserError(
  356. 'while parsing a node',
  357. start_mark,
  358. f'found undefined tag handle {tag.handle!r}',
  359. tag_mark,
  360. )
  361. if start_mark is None:
  362. start_mark = end_mark = self.scanner.peek_token().start_mark
  363. event = None
  364. implicit = tag is None or str(tag) == '!'
  365. if indentless_sequence and self.scanner.check_token(BlockEntryToken):
  366. comment = None
  367. pt = self.scanner.peek_token()
  368. if self.loader and self.loader.comment_handling is None:
  369. if pt.comment and pt.comment[0]:
  370. comment = [pt.comment[0], []]
  371. pt.comment[0] = None
  372. elif self.loader:
  373. if pt.comment:
  374. comment = pt.comment
  375. end_mark = self.scanner.peek_token().end_mark
  376. event = SequenceStartEvent(
  377. anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment,
  378. )
  379. self.state = self.parse_indentless_sequence_entry
  380. return event
  381. if self.scanner.check_token(ScalarToken):
  382. token = self.scanner.get_token()
  383. # self.scanner.peek_token_same_line_comment(token)
  384. end_mark = token.end_mark
  385. if (token.plain and tag is None) or str(tag) == '!':
  386. dimplicit = (True, False)
  387. elif tag is None:
  388. dimplicit = (False, True)
  389. else:
  390. dimplicit = (False, False)
  391. # nprint('se', token.value, token.comment)
  392. event = ScalarEvent(
  393. anchor,
  394. tag,
  395. dimplicit,
  396. token.value,
  397. start_mark,
  398. end_mark,
  399. style=token.style,
  400. comment=token.comment,
  401. )
  402. self.state = self.states.pop()
  403. elif self.scanner.check_token(FlowSequenceStartToken):
  404. pt = self.scanner.peek_token()
  405. end_mark = pt.end_mark
  406. event = SequenceStartEvent(
  407. anchor,
  408. tag,
  409. implicit,
  410. start_mark,
  411. end_mark,
  412. flow_style=True,
  413. comment=pt.comment,
  414. )
  415. self.state = self.parse_flow_sequence_first_entry
  416. elif self.scanner.check_token(FlowMappingStartToken):
  417. pt = self.scanner.peek_token()
  418. end_mark = pt.end_mark
  419. event = MappingStartEvent(
  420. anchor,
  421. tag,
  422. implicit,
  423. start_mark,
  424. end_mark,
  425. flow_style=True,
  426. comment=pt.comment,
  427. )
  428. self.state = self.parse_flow_mapping_first_key
  429. elif block and self.scanner.check_token(BlockSequenceStartToken):
  430. end_mark = self.scanner.peek_token().start_mark
  431. # should inserting the comment be dependent on the
  432. # indentation?
  433. pt = self.scanner.peek_token()
  434. comment = pt.comment
  435. # nprint('pt0', type(pt))
  436. if comment is None or comment[1] is None:
  437. comment = pt.split_old_comment()
  438. # nprint('pt1', comment)
  439. event = SequenceStartEvent(
  440. anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment,
  441. )
  442. self.state = self.parse_block_sequence_first_entry
  443. elif block and self.scanner.check_token(BlockMappingStartToken):
  444. end_mark = self.scanner.peek_token().start_mark
  445. comment = self.scanner.peek_token().comment
  446. event = MappingStartEvent(
  447. anchor, tag, implicit, start_mark, end_mark, flow_style=False, comment=comment,
  448. )
  449. self.state = self.parse_block_mapping_first_key
  450. elif anchor is not None or tag is not None:
  451. # Empty scalars are allowed even if a tag or an anchor is
  452. # specified.
  453. event = ScalarEvent(anchor, tag, (implicit, False), "", start_mark, end_mark)
  454. self.state = self.states.pop()
  455. else:
  456. if block:
  457. node = 'block'
  458. else:
  459. node = 'flow'
  460. token = self.scanner.peek_token()
  461. raise ParserError(
  462. f'while parsing a {node!s} node',
  463. start_mark,
  464. f'expected the node content, but found {token.id!r}',
  465. token.start_mark,
  466. )
  467. return event
  468. # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)*
  469. # BLOCK-END
  470. def parse_block_sequence_first_entry(self) -> Any:
  471. token = self.scanner.get_token()
  472. # move any comment from start token
  473. # self.move_token_comment(token)
  474. self.marks.append(token.start_mark)
  475. return self.parse_block_sequence_entry()
  476. def parse_block_sequence_entry(self) -> Any:
  477. if self.scanner.check_token(BlockEntryToken):
  478. token = self.scanner.get_token()
  479. self.move_token_comment(token)
  480. if not self.scanner.check_token(BlockEntryToken, BlockEndToken):
  481. self.states.append(self.parse_block_sequence_entry)
  482. return self.parse_block_node()
  483. else:
  484. self.state = self.parse_block_sequence_entry
  485. return self.process_empty_scalar(token.end_mark)
  486. if not self.scanner.check_token(BlockEndToken):
  487. token = self.scanner.peek_token()
  488. raise ParserError(
  489. 'while parsing a block collection',
  490. self.marks[-1],
  491. f'expected <block end>, but found {token.id!r}',
  492. token.start_mark,
  493. )
  494. token = self.scanner.get_token() # BlockEndToken
  495. event = SequenceEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  496. self.state = self.states.pop()
  497. self.marks.pop()
  498. return event
  499. # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
  500. # indentless_sequence?
  501. # sequence:
  502. # - entry
  503. # - nested
  504. def parse_indentless_sequence_entry(self) -> Any:
  505. if self.scanner.check_token(BlockEntryToken):
  506. token = self.scanner.get_token()
  507. self.move_token_comment(token)
  508. if not self.scanner.check_token(
  509. BlockEntryToken, KeyToken, ValueToken, BlockEndToken,
  510. ):
  511. self.states.append(self.parse_indentless_sequence_entry)
  512. return self.parse_block_node()
  513. else:
  514. self.state = self.parse_indentless_sequence_entry
  515. return self.process_empty_scalar(token.end_mark)
  516. token = self.scanner.peek_token()
  517. c = None
  518. if self.loader and self.loader.comment_handling is None:
  519. c = token.comment
  520. start_mark = token.start_mark
  521. else:
  522. start_mark = self.last_event.end_mark # type: ignore
  523. c = self.distribute_comment(token.comment, start_mark.line) # type: ignore
  524. event = SequenceEndEvent(start_mark, start_mark, comment=c)
  525. self.state = self.states.pop()
  526. return event
  527. # block_mapping ::= BLOCK-MAPPING_START
  528. # ((KEY block_node_or_indentless_sequence?)?
  529. # (VALUE block_node_or_indentless_sequence?)?)*
  530. # BLOCK-END
  531. def parse_block_mapping_first_key(self) -> Any:
  532. token = self.scanner.get_token()
  533. self.marks.append(token.start_mark)
  534. return self.parse_block_mapping_key()
  535. def parse_block_mapping_key(self) -> Any:
  536. if self.scanner.check_token(KeyToken):
  537. token = self.scanner.get_token()
  538. self.move_token_comment(token)
  539. if not self.scanner.check_token(KeyToken, ValueToken, BlockEndToken):
  540. self.states.append(self.parse_block_mapping_value)
  541. return self.parse_block_node_or_indentless_sequence()
  542. else:
  543. self.state = self.parse_block_mapping_value
  544. return self.process_empty_scalar(token.end_mark)
  545. if self.resolver.processing_version > (1, 1) and self.scanner.check_token(ValueToken):
  546. self.state = self.parse_block_mapping_value
  547. return self.process_empty_scalar(self.scanner.peek_token().start_mark)
  548. if not self.scanner.check_token(BlockEndToken):
  549. token = self.scanner.peek_token()
  550. raise ParserError(
  551. 'while parsing a block mapping',
  552. self.marks[-1],
  553. f'expected <block end>, but found {token.id!r}',
  554. token.start_mark,
  555. )
  556. token = self.scanner.get_token()
  557. self.move_token_comment(token)
  558. event = MappingEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  559. self.state = self.states.pop()
  560. self.marks.pop()
  561. return event
  562. def parse_block_mapping_value(self) -> Any:
  563. if self.scanner.check_token(ValueToken):
  564. token = self.scanner.get_token()
  565. # value token might have post comment move it to e.g. block
  566. if self.scanner.check_token(ValueToken):
  567. self.move_token_comment(token)
  568. else:
  569. if not self.scanner.check_token(KeyToken):
  570. self.move_token_comment(token, empty=True)
  571. # else: empty value for this key cannot move token.comment
  572. if not self.scanner.check_token(KeyToken, ValueToken, BlockEndToken):
  573. self.states.append(self.parse_block_mapping_key)
  574. return self.parse_block_node_or_indentless_sequence()
  575. else:
  576. self.state = self.parse_block_mapping_key
  577. comment = token.comment
  578. if comment is None:
  579. token = self.scanner.peek_token()
  580. comment = token.comment
  581. if comment:
  582. token._comment = [None, comment[1]]
  583. comment = [comment[0], None]
  584. return self.process_empty_scalar(token.end_mark, comment=comment)
  585. else:
  586. self.state = self.parse_block_mapping_key
  587. token = self.scanner.peek_token()
  588. return self.process_empty_scalar(token.start_mark)
  589. # flow_sequence ::= FLOW-SEQUENCE-START
  590. # (flow_sequence_entry FLOW-ENTRY)*
  591. # flow_sequence_entry?
  592. # FLOW-SEQUENCE-END
  593. # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  594. #
  595. # Note that while production rules for both flow_sequence_entry and
  596. # flow_mapping_entry are equal, their interpretations are different.
  597. # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
  598. # generate an inline mapping (set syntax).
  599. def parse_flow_sequence_first_entry(self) -> Any:
  600. token = self.scanner.get_token()
  601. self.marks.append(token.start_mark)
  602. return self.parse_flow_sequence_entry(first=True)
  603. def parse_flow_sequence_entry(self, first: bool = False) -> Any:
  604. if not self.scanner.check_token(FlowSequenceEndToken):
  605. if not first:
  606. if self.scanner.check_token(FlowEntryToken):
  607. self.scanner.get_token()
  608. else:
  609. token = self.scanner.peek_token()
  610. raise ParserError(
  611. 'while parsing a flow sequence',
  612. self.marks[-1],
  613. f"expected ',' or ']', but got {token.id!r}",
  614. token.start_mark,
  615. )
  616. if self.scanner.check_token(KeyToken):
  617. token = self.scanner.peek_token()
  618. event: Any = MappingStartEvent(
  619. None, None, True, token.start_mark, token.end_mark, flow_style=True,
  620. )
  621. self.state = self.parse_flow_sequence_entry_mapping_key
  622. return event
  623. elif not self.scanner.check_token(FlowSequenceEndToken):
  624. self.states.append(self.parse_flow_sequence_entry)
  625. return self.parse_flow_node()
  626. token = self.scanner.get_token()
  627. event = SequenceEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  628. self.state = self.states.pop()
  629. self.marks.pop()
  630. return event
  631. def parse_flow_sequence_entry_mapping_key(self) -> Any:
  632. token = self.scanner.get_token()
  633. if not self.scanner.check_token(ValueToken, FlowEntryToken, FlowSequenceEndToken):
  634. self.states.append(self.parse_flow_sequence_entry_mapping_value)
  635. return self.parse_flow_node()
  636. else:
  637. self.state = self.parse_flow_sequence_entry_mapping_value
  638. return self.process_empty_scalar(token.end_mark)
  639. def parse_flow_sequence_entry_mapping_value(self) -> Any:
  640. if self.scanner.check_token(ValueToken):
  641. token = self.scanner.get_token()
  642. if not self.scanner.check_token(FlowEntryToken, FlowSequenceEndToken):
  643. self.states.append(self.parse_flow_sequence_entry_mapping_end)
  644. return self.parse_flow_node()
  645. else:
  646. self.state = self.parse_flow_sequence_entry_mapping_end
  647. return self.process_empty_scalar(token.end_mark)
  648. else:
  649. self.state = self.parse_flow_sequence_entry_mapping_end
  650. token = self.scanner.peek_token()
  651. return self.process_empty_scalar(token.start_mark)
  652. def parse_flow_sequence_entry_mapping_end(self) -> Any:
  653. self.state = self.parse_flow_sequence_entry
  654. token = self.scanner.peek_token()
  655. return MappingEndEvent(token.start_mark, token.start_mark)
  656. # flow_mapping ::= FLOW-MAPPING-START
  657. # (flow_mapping_entry FLOW-ENTRY)*
  658. # flow_mapping_entry?
  659. # FLOW-MAPPING-END
  660. # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  661. def parse_flow_mapping_first_key(self) -> Any:
  662. token = self.scanner.get_token()
  663. self.marks.append(token.start_mark)
  664. return self.parse_flow_mapping_key(first=True)
  665. def parse_flow_mapping_key(self, first: Any = False) -> Any:
  666. if not self.scanner.check_token(FlowMappingEndToken):
  667. if not first:
  668. if self.scanner.check_token(FlowEntryToken):
  669. self.scanner.get_token()
  670. else:
  671. token = self.scanner.peek_token()
  672. raise ParserError(
  673. 'while parsing a flow mapping',
  674. self.marks[-1],
  675. f"expected ',' or '}}', but got {token.id!r}",
  676. token.start_mark,
  677. )
  678. if self.scanner.check_token(KeyToken):
  679. token = self.scanner.get_token()
  680. if not self.scanner.check_token(
  681. ValueToken, FlowEntryToken, FlowMappingEndToken,
  682. ):
  683. self.states.append(self.parse_flow_mapping_value)
  684. return self.parse_flow_node()
  685. else:
  686. self.state = self.parse_flow_mapping_value
  687. return self.process_empty_scalar(token.end_mark)
  688. elif self.resolver.processing_version > (1, 1) and self.scanner.check_token(
  689. ValueToken,
  690. ):
  691. self.state = self.parse_flow_mapping_value
  692. return self.process_empty_scalar(self.scanner.peek_token().end_mark)
  693. elif not self.scanner.check_token(FlowMappingEndToken):
  694. self.states.append(self.parse_flow_mapping_empty_value)
  695. return self.parse_flow_node()
  696. token = self.scanner.get_token()
  697. event = MappingEndEvent(token.start_mark, token.end_mark, comment=token.comment)
  698. self.state = self.states.pop()
  699. self.marks.pop()
  700. return event
  701. def parse_flow_mapping_value(self) -> Any:
  702. if self.scanner.check_token(ValueToken):
  703. token = self.scanner.get_token()
  704. if not self.scanner.check_token(FlowEntryToken, FlowMappingEndToken):
  705. self.states.append(self.parse_flow_mapping_key)
  706. return self.parse_flow_node()
  707. else:
  708. self.state = self.parse_flow_mapping_key
  709. return self.process_empty_scalar(token.end_mark)
  710. else:
  711. self.state = self.parse_flow_mapping_key
  712. token = self.scanner.peek_token()
  713. return self.process_empty_scalar(token.start_mark)
  714. def parse_flow_mapping_empty_value(self) -> Any:
  715. self.state = self.parse_flow_mapping_key
  716. return self.process_empty_scalar(self.scanner.peek_token().start_mark)
  717. def process_empty_scalar(self, mark: Any, comment: Any = None) -> Any:
  718. return ScalarEvent(None, None, (True, False), "", mark, mark, comment=comment)
  719. def move_token_comment(
  720. self, token: Any, nt: Optional[Any] = None, empty: Optional[bool] = False,
  721. ) -> Any:
  722. pass
  723. class RoundTripParser(Parser):
  724. """roundtrip is a safe loader, that wants to see the unmangled tag"""
  725. def select_tag_transform(self, tag: Tag) -> None:
  726. if tag is None:
  727. return
  728. tag.select_transform(True)
  729. def move_token_comment(
  730. self, token: Any, nt: Optional[Any] = None, empty: Optional[bool] = False,
  731. ) -> Any:
  732. token.move_old_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
  733. class RoundTripParserSC(RoundTripParser):
  734. """roundtrip is a safe loader, that wants to see the unmangled tag"""
  735. # some of the differences are based on the superclass testing
  736. # if self.loader.comment_handling is not None
  737. def move_token_comment(
  738. self: Any, token: Any, nt: Any = None, empty: Optional[bool] = False,
  739. ) -> None:
  740. token.move_new_comment(self.scanner.peek_token() if nt is None else nt, empty=empty)
  741. def distribute_comment(self, comment: Any, line: Any) -> Any:
  742. # ToDo, look at indentation of the comment to determine attachment
  743. if comment is None:
  744. return None
  745. if not comment[0]:
  746. return None
  747. if comment[0][0] != line + 1:
  748. nprintf('>>>dcxxx', comment, line)
  749. assert comment[0][0] == line + 1
  750. # if comment[0] - line > 1:
  751. # return
  752. typ = self.loader.comment_handling & 0b11
  753. # nprintf('>>>dca', comment, line, typ)
  754. if typ == C_POST:
  755. return None
  756. if typ == C_PRE:
  757. c = [None, None, comment[0]]
  758. comment[0] = None
  759. return c
  760. # nprintf('>>>dcb', comment[0])
  761. for _idx, cmntidx in enumerate(comment[0]):
  762. # nprintf('>>>dcb', cmntidx)
  763. if isinstance(self.scanner.comments[cmntidx], BlankLineComment):
  764. break
  765. else:
  766. return None # no space found
  767. if _idx == 0:
  768. return None # first line was blank
  769. # nprintf('>>>dcc', idx)
  770. if typ == C_SPLIT_ON_FIRST_BLANK:
  771. c = [None, None, comment[0][:_idx]]
  772. comment[0] = comment[0][_idx:]
  773. return c
  774. raise NotImplementedError # reserved