emitter.py 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137
  1. # Emitter expects events obeying the following grammar:
  2. # stream ::= STREAM-START document* STREAM-END
  3. # document ::= DOCUMENT-START node DOCUMENT-END
  4. # node ::= SCALAR | sequence | mapping
  5. # sequence ::= SEQUENCE-START node* SEQUENCE-END
  6. # mapping ::= MAPPING-START (node node)* MAPPING-END
  7. __all__ = ['Emitter', 'EmitterError']
  8. from .error import YAMLError
  9. from .events import *
  10. class EmitterError(YAMLError):
  11. pass
  12. class ScalarAnalysis:
  13. def __init__(self, scalar, empty, multiline,
  14. allow_flow_plain, allow_block_plain,
  15. allow_single_quoted, allow_double_quoted,
  16. allow_block):
  17. self.scalar = scalar
  18. self.empty = empty
  19. self.multiline = multiline
  20. self.allow_flow_plain = allow_flow_plain
  21. self.allow_block_plain = allow_block_plain
  22. self.allow_single_quoted = allow_single_quoted
  23. self.allow_double_quoted = allow_double_quoted
  24. self.allow_block = allow_block
  25. class Emitter:
  26. DEFAULT_TAG_PREFIXES = {
  27. '!' : '!',
  28. 'tag:yaml.org,2002:' : '!!',
  29. }
  30. def __init__(self, stream, canonical=None, indent=None, width=None,
  31. allow_unicode=None, line_break=None):
  32. # The stream should have the methods `write` and possibly `flush`.
  33. self.stream = stream
  34. # Encoding can be overridden by STREAM-START.
  35. self.encoding = None
  36. # Emitter is a state machine with a stack of states to handle nested
  37. # structures.
  38. self.states = []
  39. self.state = self.expect_stream_start
  40. # Current event and the event queue.
  41. self.events = []
  42. self.event = None
  43. # The current indentation level and the stack of previous indents.
  44. self.indents = []
  45. self.indent = None
  46. # Flow level.
  47. self.flow_level = 0
  48. # Contexts.
  49. self.root_context = False
  50. self.sequence_context = False
  51. self.mapping_context = False
  52. self.simple_key_context = False
  53. # Characteristics of the last emitted character:
  54. # - current position.
  55. # - is it a whitespace?
  56. # - is it an indention character
  57. # (indentation space, '-', '?', or ':')?
  58. self.line = 0
  59. self.column = 0
  60. self.whitespace = True
  61. self.indention = True
  62. # Whether the document requires an explicit document indicator
  63. self.open_ended = False
  64. # Formatting details.
  65. self.canonical = canonical
  66. self.allow_unicode = allow_unicode
  67. self.best_indent = 2
  68. if indent and 1 < indent < 10:
  69. self.best_indent = indent
  70. self.best_width = 80
  71. if width and width > self.best_indent*2:
  72. self.best_width = width
  73. self.best_line_break = '\n'
  74. if line_break in ['\r', '\n', '\r\n']:
  75. self.best_line_break = line_break
  76. # Tag prefixes.
  77. self.tag_prefixes = None
  78. # Prepared anchor and tag.
  79. self.prepared_anchor = None
  80. self.prepared_tag = None
  81. # Scalar analysis and style.
  82. self.analysis = None
  83. self.style = None
  84. def dispose(self):
  85. # Reset the state attributes (to clear self-references)
  86. self.states = []
  87. self.state = None
  88. def emit(self, event):
  89. self.events.append(event)
  90. while not self.need_more_events():
  91. self.event = self.events.pop(0)
  92. self.state()
  93. self.event = None
  94. # In some cases, we wait for a few next events before emitting.
  95. def need_more_events(self):
  96. if not self.events:
  97. return True
  98. event = self.events[0]
  99. if isinstance(event, DocumentStartEvent):
  100. return self.need_events(1)
  101. elif isinstance(event, SequenceStartEvent):
  102. return self.need_events(2)
  103. elif isinstance(event, MappingStartEvent):
  104. return self.need_events(3)
  105. else:
  106. return False
  107. def need_events(self, count):
  108. level = 0
  109. for event in self.events[1:]:
  110. if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
  111. level += 1
  112. elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
  113. level -= 1
  114. elif isinstance(event, StreamEndEvent):
  115. level = -1
  116. if level < 0:
  117. return False
  118. return (len(self.events) < count+1)
  119. def increase_indent(self, flow=False, indentless=False):
  120. self.indents.append(self.indent)
  121. if self.indent is None:
  122. if flow:
  123. self.indent = self.best_indent
  124. else:
  125. self.indent = 0
  126. elif not indentless:
  127. self.indent += self.best_indent
  128. # States.
  129. # Stream handlers.
  130. def expect_stream_start(self):
  131. if isinstance(self.event, StreamStartEvent):
  132. if self.event.encoding and not hasattr(self.stream, 'encoding'):
  133. self.encoding = self.event.encoding
  134. self.write_stream_start()
  135. self.state = self.expect_first_document_start
  136. else:
  137. raise EmitterError("expected StreamStartEvent, but got %s"
  138. % self.event)
  139. def expect_nothing(self):
  140. raise EmitterError("expected nothing, but got %s" % self.event)
  141. # Document handlers.
  142. def expect_first_document_start(self):
  143. return self.expect_document_start(first=True)
  144. def expect_document_start(self, first=False):
  145. if isinstance(self.event, DocumentStartEvent):
  146. if (self.event.version or self.event.tags) and self.open_ended:
  147. self.write_indicator('...', True)
  148. self.write_indent()
  149. if self.event.version:
  150. version_text = self.prepare_version(self.event.version)
  151. self.write_version_directive(version_text)
  152. self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
  153. if self.event.tags:
  154. handles = sorted(self.event.tags.keys())
  155. for handle in handles:
  156. prefix = self.event.tags[handle]
  157. self.tag_prefixes[prefix] = handle
  158. handle_text = self.prepare_tag_handle(handle)
  159. prefix_text = self.prepare_tag_prefix(prefix)
  160. self.write_tag_directive(handle_text, prefix_text)
  161. implicit = (first and not self.event.explicit and not self.canonical
  162. and not self.event.version and not self.event.tags
  163. and not self.check_empty_document())
  164. if not implicit:
  165. self.write_indent()
  166. self.write_indicator('---', True)
  167. if self.canonical:
  168. self.write_indent()
  169. self.state = self.expect_document_root
  170. elif isinstance(self.event, StreamEndEvent):
  171. if self.open_ended:
  172. self.write_indicator('...', True)
  173. self.write_indent()
  174. self.write_stream_end()
  175. self.state = self.expect_nothing
  176. else:
  177. raise EmitterError("expected DocumentStartEvent, but got %s"
  178. % self.event)
  179. def expect_document_end(self):
  180. if isinstance(self.event, DocumentEndEvent):
  181. self.write_indent()
  182. if self.event.explicit:
  183. self.write_indicator('...', True)
  184. self.write_indent()
  185. self.flush_stream()
  186. self.state = self.expect_document_start
  187. else:
  188. raise EmitterError("expected DocumentEndEvent, but got %s"
  189. % self.event)
  190. def expect_document_root(self):
  191. self.states.append(self.expect_document_end)
  192. self.expect_node(root=True)
  193. # Node handlers.
  194. def expect_node(self, root=False, sequence=False, mapping=False,
  195. simple_key=False):
  196. self.root_context = root
  197. self.sequence_context = sequence
  198. self.mapping_context = mapping
  199. self.simple_key_context = simple_key
  200. if isinstance(self.event, AliasEvent):
  201. self.expect_alias()
  202. elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
  203. self.process_anchor('&')
  204. self.process_tag()
  205. if isinstance(self.event, ScalarEvent):
  206. self.expect_scalar()
  207. elif isinstance(self.event, SequenceStartEvent):
  208. if self.flow_level or self.canonical or self.event.flow_style \
  209. or self.check_empty_sequence():
  210. self.expect_flow_sequence()
  211. else:
  212. self.expect_block_sequence()
  213. elif isinstance(self.event, MappingStartEvent):
  214. if self.flow_level or self.canonical or self.event.flow_style \
  215. or self.check_empty_mapping():
  216. self.expect_flow_mapping()
  217. else:
  218. self.expect_block_mapping()
  219. else:
  220. raise EmitterError("expected NodeEvent, but got %s" % self.event)
  221. def expect_alias(self):
  222. if self.event.anchor is None:
  223. raise EmitterError("anchor is not specified for alias")
  224. self.process_anchor('*')
  225. self.state = self.states.pop()
  226. def expect_scalar(self):
  227. self.increase_indent(flow=True)
  228. self.process_scalar()
  229. self.indent = self.indents.pop()
  230. self.state = self.states.pop()
  231. # Flow sequence handlers.
  232. def expect_flow_sequence(self):
  233. self.write_indicator('[', True, whitespace=True)
  234. self.flow_level += 1
  235. self.increase_indent(flow=True)
  236. self.state = self.expect_first_flow_sequence_item
  237. def expect_first_flow_sequence_item(self):
  238. if isinstance(self.event, SequenceEndEvent):
  239. self.indent = self.indents.pop()
  240. self.flow_level -= 1
  241. self.write_indicator(']', False)
  242. self.state = self.states.pop()
  243. else:
  244. if self.canonical or self.column > self.best_width:
  245. self.write_indent()
  246. self.states.append(self.expect_flow_sequence_item)
  247. self.expect_node(sequence=True)
  248. def expect_flow_sequence_item(self):
  249. if isinstance(self.event, SequenceEndEvent):
  250. self.indent = self.indents.pop()
  251. self.flow_level -= 1
  252. if self.canonical:
  253. self.write_indicator(',', False)
  254. self.write_indent()
  255. self.write_indicator(']', False)
  256. self.state = self.states.pop()
  257. else:
  258. self.write_indicator(',', False)
  259. if self.canonical or self.column > self.best_width:
  260. self.write_indent()
  261. self.states.append(self.expect_flow_sequence_item)
  262. self.expect_node(sequence=True)
  263. # Flow mapping handlers.
  264. def expect_flow_mapping(self):
  265. self.write_indicator('{', True, whitespace=True)
  266. self.flow_level += 1
  267. self.increase_indent(flow=True)
  268. self.state = self.expect_first_flow_mapping_key
  269. def expect_first_flow_mapping_key(self):
  270. if isinstance(self.event, MappingEndEvent):
  271. self.indent = self.indents.pop()
  272. self.flow_level -= 1
  273. self.write_indicator('}', False)
  274. self.state = self.states.pop()
  275. else:
  276. if self.canonical or self.column > self.best_width:
  277. self.write_indent()
  278. if not self.canonical and self.check_simple_key():
  279. self.states.append(self.expect_flow_mapping_simple_value)
  280. self.expect_node(mapping=True, simple_key=True)
  281. else:
  282. self.write_indicator('?', True)
  283. self.states.append(self.expect_flow_mapping_value)
  284. self.expect_node(mapping=True)
  285. def expect_flow_mapping_key(self):
  286. if isinstance(self.event, MappingEndEvent):
  287. self.indent = self.indents.pop()
  288. self.flow_level -= 1
  289. if self.canonical:
  290. self.write_indicator(',', False)
  291. self.write_indent()
  292. self.write_indicator('}', False)
  293. self.state = self.states.pop()
  294. else:
  295. self.write_indicator(',', False)
  296. if self.canonical or self.column > self.best_width:
  297. self.write_indent()
  298. if not self.canonical and self.check_simple_key():
  299. self.states.append(self.expect_flow_mapping_simple_value)
  300. self.expect_node(mapping=True, simple_key=True)
  301. else:
  302. self.write_indicator('?', True)
  303. self.states.append(self.expect_flow_mapping_value)
  304. self.expect_node(mapping=True)
  305. def expect_flow_mapping_simple_value(self):
  306. self.write_indicator(':', False)
  307. self.states.append(self.expect_flow_mapping_key)
  308. self.expect_node(mapping=True)
  309. def expect_flow_mapping_value(self):
  310. if self.canonical or self.column > self.best_width:
  311. self.write_indent()
  312. self.write_indicator(':', True)
  313. self.states.append(self.expect_flow_mapping_key)
  314. self.expect_node(mapping=True)
  315. # Block sequence handlers.
  316. def expect_block_sequence(self):
  317. indentless = (self.mapping_context and not self.indention)
  318. self.increase_indent(flow=False, indentless=indentless)
  319. self.state = self.expect_first_block_sequence_item
  320. def expect_first_block_sequence_item(self):
  321. return self.expect_block_sequence_item(first=True)
  322. def expect_block_sequence_item(self, first=False):
  323. if not first and isinstance(self.event, SequenceEndEvent):
  324. self.indent = self.indents.pop()
  325. self.state = self.states.pop()
  326. else:
  327. self.write_indent()
  328. self.write_indicator('-', True, indention=True)
  329. self.states.append(self.expect_block_sequence_item)
  330. self.expect_node(sequence=True)
  331. # Block mapping handlers.
  332. def expect_block_mapping(self):
  333. self.increase_indent(flow=False)
  334. self.state = self.expect_first_block_mapping_key
  335. def expect_first_block_mapping_key(self):
  336. return self.expect_block_mapping_key(first=True)
  337. def expect_block_mapping_key(self, first=False):
  338. if not first and isinstance(self.event, MappingEndEvent):
  339. self.indent = self.indents.pop()
  340. self.state = self.states.pop()
  341. else:
  342. self.write_indent()
  343. if self.check_simple_key():
  344. self.states.append(self.expect_block_mapping_simple_value)
  345. self.expect_node(mapping=True, simple_key=True)
  346. else:
  347. self.write_indicator('?', True, indention=True)
  348. self.states.append(self.expect_block_mapping_value)
  349. self.expect_node(mapping=True)
  350. def expect_block_mapping_simple_value(self):
  351. self.write_indicator(':', False)
  352. self.states.append(self.expect_block_mapping_key)
  353. self.expect_node(mapping=True)
  354. def expect_block_mapping_value(self):
  355. self.write_indent()
  356. self.write_indicator(':', True, indention=True)
  357. self.states.append(self.expect_block_mapping_key)
  358. self.expect_node(mapping=True)
  359. # Checkers.
  360. def check_empty_sequence(self):
  361. return (isinstance(self.event, SequenceStartEvent) and self.events
  362. and isinstance(self.events[0], SequenceEndEvent))
  363. def check_empty_mapping(self):
  364. return (isinstance(self.event, MappingStartEvent) and self.events
  365. and isinstance(self.events[0], MappingEndEvent))
  366. def check_empty_document(self):
  367. if not isinstance(self.event, DocumentStartEvent) or not self.events:
  368. return False
  369. event = self.events[0]
  370. return (isinstance(event, ScalarEvent) and event.anchor is None
  371. and event.tag is None and event.implicit and event.value == '')
  372. def check_simple_key(self):
  373. length = 0
  374. if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
  375. if self.prepared_anchor is None:
  376. self.prepared_anchor = self.prepare_anchor(self.event.anchor)
  377. length += len(self.prepared_anchor)
  378. if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
  379. and self.event.tag is not None:
  380. if self.prepared_tag is None:
  381. self.prepared_tag = self.prepare_tag(self.event.tag)
  382. length += len(self.prepared_tag)
  383. if isinstance(self.event, ScalarEvent):
  384. if self.analysis is None:
  385. self.analysis = self.analyze_scalar(self.event.value)
  386. length += len(self.analysis.scalar)
  387. return (length < 128 and (isinstance(self.event, AliasEvent)
  388. or (isinstance(self.event, ScalarEvent)
  389. and not self.analysis.empty and not self.analysis.multiline)
  390. or self.check_empty_sequence() or self.check_empty_mapping()))
  391. # Anchor, Tag, and Scalar processors.
  392. def process_anchor(self, indicator):
  393. if self.event.anchor is None:
  394. self.prepared_anchor = None
  395. return
  396. if self.prepared_anchor is None:
  397. self.prepared_anchor = self.prepare_anchor(self.event.anchor)
  398. if self.prepared_anchor:
  399. self.write_indicator(indicator+self.prepared_anchor, True)
  400. self.prepared_anchor = None
  401. def process_tag(self):
  402. tag = self.event.tag
  403. if isinstance(self.event, ScalarEvent):
  404. if self.style is None:
  405. self.style = self.choose_scalar_style()
  406. if ((not self.canonical or tag is None) and
  407. ((self.style == '' and self.event.implicit[0])
  408. or (self.style != '' and self.event.implicit[1]))):
  409. self.prepared_tag = None
  410. return
  411. if self.event.implicit[0] and tag is None:
  412. tag = '!'
  413. self.prepared_tag = None
  414. else:
  415. if (not self.canonical or tag is None) and self.event.implicit:
  416. self.prepared_tag = None
  417. return
  418. if tag is None:
  419. raise EmitterError("tag is not specified")
  420. if self.prepared_tag is None:
  421. self.prepared_tag = self.prepare_tag(tag)
  422. if self.prepared_tag:
  423. self.write_indicator(self.prepared_tag, True)
  424. self.prepared_tag = None
  425. def choose_scalar_style(self):
  426. if self.analysis is None:
  427. self.analysis = self.analyze_scalar(self.event.value)
  428. if self.event.style == '"' or self.canonical:
  429. return '"'
  430. if not self.event.style and self.event.implicit[0]:
  431. if (not (self.simple_key_context and
  432. (self.analysis.empty or self.analysis.multiline))
  433. and (self.flow_level and self.analysis.allow_flow_plain
  434. or (not self.flow_level and self.analysis.allow_block_plain))):
  435. return ''
  436. if self.event.style and self.event.style in '|>':
  437. if (not self.flow_level and not self.simple_key_context
  438. and self.analysis.allow_block):
  439. return self.event.style
  440. if not self.event.style or self.event.style == '\'':
  441. if (self.analysis.allow_single_quoted and
  442. not (self.simple_key_context and self.analysis.multiline)):
  443. return '\''
  444. return '"'
  445. def process_scalar(self):
  446. if self.analysis is None:
  447. self.analysis = self.analyze_scalar(self.event.value)
  448. if self.style is None:
  449. self.style = self.choose_scalar_style()
  450. split = (not self.simple_key_context)
  451. #if self.analysis.multiline and split \
  452. # and (not self.style or self.style in '\'\"'):
  453. # self.write_indent()
  454. if self.style == '"':
  455. self.write_double_quoted(self.analysis.scalar, split)
  456. elif self.style == '\'':
  457. self.write_single_quoted(self.analysis.scalar, split)
  458. elif self.style == '>':
  459. self.write_folded(self.analysis.scalar)
  460. elif self.style == '|':
  461. self.write_literal(self.analysis.scalar)
  462. else:
  463. self.write_plain(self.analysis.scalar, split)
  464. self.analysis = None
  465. self.style = None
  466. # Analyzers.
  467. def prepare_version(self, version):
  468. major, minor = version
  469. if major != 1:
  470. raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
  471. return '%d.%d' % (major, minor)
  472. def prepare_tag_handle(self, handle):
  473. if not handle:
  474. raise EmitterError("tag handle must not be empty")
  475. if handle[0] != '!' or handle[-1] != '!':
  476. raise EmitterError("tag handle must start and end with '!': %r" % handle)
  477. for ch in handle[1:-1]:
  478. if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  479. or ch in '-_'):
  480. raise EmitterError("invalid character %r in the tag handle: %r"
  481. % (ch, handle))
  482. return handle
  483. def prepare_tag_prefix(self, prefix):
  484. if not prefix:
  485. raise EmitterError("tag prefix must not be empty")
  486. chunks = []
  487. start = end = 0
  488. if prefix[0] == '!':
  489. end = 1
  490. while end < len(prefix):
  491. ch = prefix[end]
  492. if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  493. or ch in '-;/?!:@&=+$,_.~*\'()[]':
  494. end += 1
  495. else:
  496. if start < end:
  497. chunks.append(prefix[start:end])
  498. start = end = end+1
  499. data = ch.encode('utf-8')
  500. for ch in data:
  501. chunks.append('%%%02X' % ord(ch))
  502. if start < end:
  503. chunks.append(prefix[start:end])
  504. return ''.join(chunks)
  505. def prepare_tag(self, tag):
  506. if not tag:
  507. raise EmitterError("tag must not be empty")
  508. if tag == '!':
  509. return tag
  510. handle = None
  511. suffix = tag
  512. prefixes = sorted(self.tag_prefixes.keys())
  513. for prefix in prefixes:
  514. if tag.startswith(prefix) \
  515. and (prefix == '!' or len(prefix) < len(tag)):
  516. handle = self.tag_prefixes[prefix]
  517. suffix = tag[len(prefix):]
  518. chunks = []
  519. start = end = 0
  520. while end < len(suffix):
  521. ch = suffix[end]
  522. if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  523. or ch in '-;/?:@&=+$,_.~*\'()[]' \
  524. or (ch == '!' and handle != '!'):
  525. end += 1
  526. else:
  527. if start < end:
  528. chunks.append(suffix[start:end])
  529. start = end = end+1
  530. data = ch.encode('utf-8')
  531. for ch in data:
  532. chunks.append('%%%02X' % ch)
  533. if start < end:
  534. chunks.append(suffix[start:end])
  535. suffix_text = ''.join(chunks)
  536. if handle:
  537. return '%s%s' % (handle, suffix_text)
  538. else:
  539. return '!<%s>' % suffix_text
  540. def prepare_anchor(self, anchor):
  541. if not anchor:
  542. raise EmitterError("anchor must not be empty")
  543. for ch in anchor:
  544. if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
  545. or ch in '-_'):
  546. raise EmitterError("invalid character %r in the anchor: %r"
  547. % (ch, anchor))
  548. return anchor
  549. def analyze_scalar(self, scalar):
  550. # Empty scalar is a special case.
  551. if not scalar:
  552. return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
  553. allow_flow_plain=False, allow_block_plain=True,
  554. allow_single_quoted=True, allow_double_quoted=True,
  555. allow_block=False)
  556. # Indicators and special characters.
  557. block_indicators = False
  558. flow_indicators = False
  559. line_breaks = False
  560. special_characters = False
  561. # Important whitespace combinations.
  562. leading_space = False
  563. leading_break = False
  564. trailing_space = False
  565. trailing_break = False
  566. break_space = False
  567. space_break = False
  568. # Check document indicators.
  569. if scalar.startswith('---') or scalar.startswith('...'):
  570. block_indicators = True
  571. flow_indicators = True
  572. # First character or preceded by a whitespace.
  573. preceded_by_whitespace = True
  574. # Last character or followed by a whitespace.
  575. followed_by_whitespace = (len(scalar) == 1 or
  576. scalar[1] in '\0 \t\r\n\x85\u2028\u2029')
  577. # The previous character is a space.
  578. previous_space = False
  579. # The previous character is a break.
  580. previous_break = False
  581. index = 0
  582. while index < len(scalar):
  583. ch = scalar[index]
  584. # Check for indicators.
  585. if index == 0:
  586. # Leading indicators are special characters.
  587. if ch in '#,[]{}&*!|>\'\"%@`':
  588. flow_indicators = True
  589. block_indicators = True
  590. if ch in '?:':
  591. flow_indicators = True
  592. if followed_by_whitespace:
  593. block_indicators = True
  594. if ch == '-' and followed_by_whitespace:
  595. flow_indicators = True
  596. block_indicators = True
  597. else:
  598. # Some indicators cannot appear within a scalar as well.
  599. if ch in ',?[]{}':
  600. flow_indicators = True
  601. if ch == ':':
  602. flow_indicators = True
  603. if followed_by_whitespace:
  604. block_indicators = True
  605. if ch == '#' and preceded_by_whitespace:
  606. flow_indicators = True
  607. block_indicators = True
  608. # Check for line breaks, special, and unicode characters.
  609. if ch in '\n\x85\u2028\u2029':
  610. line_breaks = True
  611. if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
  612. if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
  613. or '\uE000' <= ch <= '\uFFFD'
  614. or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
  615. unicode_characters = True
  616. if not self.allow_unicode:
  617. special_characters = True
  618. else:
  619. special_characters = True
  620. # Detect important whitespace combinations.
  621. if ch == ' ':
  622. if index == 0:
  623. leading_space = True
  624. if index == len(scalar)-1:
  625. trailing_space = True
  626. if previous_break:
  627. break_space = True
  628. previous_space = True
  629. previous_break = False
  630. elif ch in '\n\x85\u2028\u2029':
  631. if index == 0:
  632. leading_break = True
  633. if index == len(scalar)-1:
  634. trailing_break = True
  635. if previous_space:
  636. space_break = True
  637. previous_space = False
  638. previous_break = True
  639. else:
  640. previous_space = False
  641. previous_break = False
  642. # Prepare for the next character.
  643. index += 1
  644. preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029')
  645. followed_by_whitespace = (index+1 >= len(scalar) or
  646. scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029')
  647. # Let's decide what styles are allowed.
  648. allow_flow_plain = True
  649. allow_block_plain = True
  650. allow_single_quoted = True
  651. allow_double_quoted = True
  652. allow_block = True
  653. # Leading and trailing whitespaces are bad for plain scalars.
  654. if (leading_space or leading_break
  655. or trailing_space or trailing_break):
  656. allow_flow_plain = allow_block_plain = False
  657. # We do not permit trailing spaces for block scalars.
  658. if trailing_space:
  659. allow_block = False
  660. # Spaces at the beginning of a new line are only acceptable for block
  661. # scalars.
  662. if break_space:
  663. allow_flow_plain = allow_block_plain = allow_single_quoted = False
  664. # Spaces followed by breaks, as well as special character are only
  665. # allowed for double quoted scalars.
  666. if space_break or special_characters:
  667. allow_flow_plain = allow_block_plain = \
  668. allow_single_quoted = allow_block = False
  669. # Although the plain scalar writer supports breaks, we never emit
  670. # multiline plain scalars.
  671. if line_breaks:
  672. allow_flow_plain = allow_block_plain = False
  673. # Flow indicators are forbidden for flow plain scalars.
  674. if flow_indicators:
  675. allow_flow_plain = False
  676. # Block indicators are forbidden for block plain scalars.
  677. if block_indicators:
  678. allow_block_plain = False
  679. return ScalarAnalysis(scalar=scalar,
  680. empty=False, multiline=line_breaks,
  681. allow_flow_plain=allow_flow_plain,
  682. allow_block_plain=allow_block_plain,
  683. allow_single_quoted=allow_single_quoted,
  684. allow_double_quoted=allow_double_quoted,
  685. allow_block=allow_block)
  686. # Writers.
  687. def flush_stream(self):
  688. if hasattr(self.stream, 'flush'):
  689. self.stream.flush()
  690. def write_stream_start(self):
  691. # Write BOM if needed.
  692. if self.encoding and self.encoding.startswith('utf-16'):
  693. self.stream.write('\uFEFF'.encode(self.encoding))
  694. def write_stream_end(self):
  695. self.flush_stream()
  696. def write_indicator(self, indicator, need_whitespace,
  697. whitespace=False, indention=False):
  698. if self.whitespace or not need_whitespace:
  699. data = indicator
  700. else:
  701. data = ' '+indicator
  702. self.whitespace = whitespace
  703. self.indention = self.indention and indention
  704. self.column += len(data)
  705. self.open_ended = False
  706. if self.encoding:
  707. data = data.encode(self.encoding)
  708. self.stream.write(data)
  709. def write_indent(self):
  710. indent = self.indent or 0
  711. if not self.indention or self.column > indent \
  712. or (self.column == indent and not self.whitespace):
  713. self.write_line_break()
  714. if self.column < indent:
  715. self.whitespace = True
  716. data = ' '*(indent-self.column)
  717. self.column = indent
  718. if self.encoding:
  719. data = data.encode(self.encoding)
  720. self.stream.write(data)
  721. def write_line_break(self, data=None):
  722. if data is None:
  723. data = self.best_line_break
  724. self.whitespace = True
  725. self.indention = True
  726. self.line += 1
  727. self.column = 0
  728. if self.encoding:
  729. data = data.encode(self.encoding)
  730. self.stream.write(data)
  731. def write_version_directive(self, version_text):
  732. data = '%%YAML %s' % version_text
  733. if self.encoding:
  734. data = data.encode(self.encoding)
  735. self.stream.write(data)
  736. self.write_line_break()
  737. def write_tag_directive(self, handle_text, prefix_text):
  738. data = '%%TAG %s %s' % (handle_text, prefix_text)
  739. if self.encoding:
  740. data = data.encode(self.encoding)
  741. self.stream.write(data)
  742. self.write_line_break()
  743. # Scalar streams.
  744. def write_single_quoted(self, text, split=True):
  745. self.write_indicator('\'', True)
  746. spaces = False
  747. breaks = False
  748. start = end = 0
  749. while end <= len(text):
  750. ch = None
  751. if end < len(text):
  752. ch = text[end]
  753. if spaces:
  754. if ch is None or ch != ' ':
  755. if start+1 == end and self.column > self.best_width and split \
  756. and start != 0 and end != len(text):
  757. self.write_indent()
  758. else:
  759. data = text[start:end]
  760. self.column += len(data)
  761. if self.encoding:
  762. data = data.encode(self.encoding)
  763. self.stream.write(data)
  764. start = end
  765. elif breaks:
  766. if ch is None or ch not in '\n\x85\u2028\u2029':
  767. if text[start] == '\n':
  768. self.write_line_break()
  769. for br in text[start:end]:
  770. if br == '\n':
  771. self.write_line_break()
  772. else:
  773. self.write_line_break(br)
  774. self.write_indent()
  775. start = end
  776. else:
  777. if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'':
  778. if start < end:
  779. data = text[start:end]
  780. self.column += len(data)
  781. if self.encoding:
  782. data = data.encode(self.encoding)
  783. self.stream.write(data)
  784. start = end
  785. if ch == '\'':
  786. data = '\'\''
  787. self.column += 2
  788. if self.encoding:
  789. data = data.encode(self.encoding)
  790. self.stream.write(data)
  791. start = end + 1
  792. if ch is not None:
  793. spaces = (ch == ' ')
  794. breaks = (ch in '\n\x85\u2028\u2029')
  795. end += 1
  796. self.write_indicator('\'', False)
  797. ESCAPE_REPLACEMENTS = {
  798. '\0': '0',
  799. '\x07': 'a',
  800. '\x08': 'b',
  801. '\x09': 't',
  802. '\x0A': 'n',
  803. '\x0B': 'v',
  804. '\x0C': 'f',
  805. '\x0D': 'r',
  806. '\x1B': 'e',
  807. '\"': '\"',
  808. '\\': '\\',
  809. '\x85': 'N',
  810. '\xA0': '_',
  811. '\u2028': 'L',
  812. '\u2029': 'P',
  813. }
  814. def write_double_quoted(self, text, split=True):
  815. self.write_indicator('"', True)
  816. start = end = 0
  817. while end <= len(text):
  818. ch = None
  819. if end < len(text):
  820. ch = text[end]
  821. if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \
  822. or not ('\x20' <= ch <= '\x7E'
  823. or (self.allow_unicode
  824. and ('\xA0' <= ch <= '\uD7FF'
  825. or '\uE000' <= ch <= '\uFFFD'))):
  826. if start < end:
  827. data = text[start:end]
  828. self.column += len(data)
  829. if self.encoding:
  830. data = data.encode(self.encoding)
  831. self.stream.write(data)
  832. start = end
  833. if ch is not None:
  834. if ch in self.ESCAPE_REPLACEMENTS:
  835. data = '\\'+self.ESCAPE_REPLACEMENTS[ch]
  836. elif ch <= '\xFF':
  837. data = '\\x%02X' % ord(ch)
  838. elif ch <= '\uFFFF':
  839. data = '\\u%04X' % ord(ch)
  840. else:
  841. data = '\\U%08X' % ord(ch)
  842. self.column += len(data)
  843. if self.encoding:
  844. data = data.encode(self.encoding)
  845. self.stream.write(data)
  846. start = end+1
  847. if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \
  848. and self.column+(end-start) > self.best_width and split:
  849. data = text[start:end]+'\\'
  850. if start < end:
  851. start = end
  852. self.column += len(data)
  853. if self.encoding:
  854. data = data.encode(self.encoding)
  855. self.stream.write(data)
  856. self.write_indent()
  857. self.whitespace = False
  858. self.indention = False
  859. if text[start] == ' ':
  860. data = '\\'
  861. self.column += len(data)
  862. if self.encoding:
  863. data = data.encode(self.encoding)
  864. self.stream.write(data)
  865. end += 1
  866. self.write_indicator('"', False)
  867. def determine_block_hints(self, text):
  868. hints = ''
  869. if text:
  870. if text[0] in ' \n\x85\u2028\u2029':
  871. hints += str(self.best_indent)
  872. if text[-1] not in '\n\x85\u2028\u2029':
  873. hints += '-'
  874. elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029':
  875. hints += '+'
  876. return hints
  877. def write_folded(self, text):
  878. hints = self.determine_block_hints(text)
  879. self.write_indicator('>'+hints, True)
  880. if hints[-1:] == '+':
  881. self.open_ended = True
  882. self.write_line_break()
  883. leading_space = True
  884. spaces = False
  885. breaks = True
  886. start = end = 0
  887. while end <= len(text):
  888. ch = None
  889. if end < len(text):
  890. ch = text[end]
  891. if breaks:
  892. if ch is None or ch not in '\n\x85\u2028\u2029':
  893. if not leading_space and ch is not None and ch != ' ' \
  894. and text[start] == '\n':
  895. self.write_line_break()
  896. leading_space = (ch == ' ')
  897. for br in text[start:end]:
  898. if br == '\n':
  899. self.write_line_break()
  900. else:
  901. self.write_line_break(br)
  902. if ch is not None:
  903. self.write_indent()
  904. start = end
  905. elif spaces:
  906. if ch != ' ':
  907. if start+1 == end and self.column > self.best_width:
  908. self.write_indent()
  909. else:
  910. data = text[start:end]
  911. self.column += len(data)
  912. if self.encoding:
  913. data = data.encode(self.encoding)
  914. self.stream.write(data)
  915. start = end
  916. else:
  917. if ch is None or ch in ' \n\x85\u2028\u2029':
  918. data = text[start:end]
  919. self.column += len(data)
  920. if self.encoding:
  921. data = data.encode(self.encoding)
  922. self.stream.write(data)
  923. if ch is None:
  924. self.write_line_break()
  925. start = end
  926. if ch is not None:
  927. breaks = (ch in '\n\x85\u2028\u2029')
  928. spaces = (ch == ' ')
  929. end += 1
  930. def write_literal(self, text):
  931. hints = self.determine_block_hints(text)
  932. self.write_indicator('|'+hints, True)
  933. if hints[-1:] == '+':
  934. self.open_ended = True
  935. self.write_line_break()
  936. breaks = True
  937. start = end = 0
  938. while end <= len(text):
  939. ch = None
  940. if end < len(text):
  941. ch = text[end]
  942. if breaks:
  943. if ch is None or ch not in '\n\x85\u2028\u2029':
  944. for br in text[start:end]:
  945. if br == '\n':
  946. self.write_line_break()
  947. else:
  948. self.write_line_break(br)
  949. if ch is not None:
  950. self.write_indent()
  951. start = end
  952. else:
  953. if ch is None or ch in '\n\x85\u2028\u2029':
  954. data = text[start:end]
  955. if self.encoding:
  956. data = data.encode(self.encoding)
  957. self.stream.write(data)
  958. if ch is None:
  959. self.write_line_break()
  960. start = end
  961. if ch is not None:
  962. breaks = (ch in '\n\x85\u2028\u2029')
  963. end += 1
  964. def write_plain(self, text, split=True):
  965. if self.root_context:
  966. self.open_ended = True
  967. if not text:
  968. return
  969. if not self.whitespace:
  970. data = ' '
  971. self.column += len(data)
  972. if self.encoding:
  973. data = data.encode(self.encoding)
  974. self.stream.write(data)
  975. self.whitespace = False
  976. self.indention = False
  977. spaces = False
  978. breaks = False
  979. start = end = 0
  980. while end <= len(text):
  981. ch = None
  982. if end < len(text):
  983. ch = text[end]
  984. if spaces:
  985. if ch != ' ':
  986. if start+1 == end and self.column > self.best_width and split:
  987. self.write_indent()
  988. self.whitespace = False
  989. self.indention = False
  990. else:
  991. data = text[start:end]
  992. self.column += len(data)
  993. if self.encoding:
  994. data = data.encode(self.encoding)
  995. self.stream.write(data)
  996. start = end
  997. elif breaks:
  998. if ch not in '\n\x85\u2028\u2029':
  999. if text[start] == '\n':
  1000. self.write_line_break()
  1001. for br in text[start:end]:
  1002. if br == '\n':
  1003. self.write_line_break()
  1004. else:
  1005. self.write_line_break(br)
  1006. self.write_indent()
  1007. self.whitespace = False
  1008. self.indention = False
  1009. start = end
  1010. else:
  1011. if ch is None or ch in ' \n\x85\u2028\u2029':
  1012. data = text[start:end]
  1013. self.column += len(data)
  1014. if self.encoding:
  1015. data = data.encode(self.encoding)
  1016. self.stream.write(data)
  1017. start = end
  1018. if ch is not None:
  1019. spaces = (ch == ' ')
  1020. breaks = (ch in '\n\x85\u2028\u2029')
  1021. end += 1