scanner.py 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980
  1. # coding: utf-8
  2. from __future__ import print_function, absolute_import, division, unicode_literals
  3. # Scanner produces tokens of the following types:
  4. # STREAM-START
  5. # STREAM-END
  6. # DIRECTIVE(name, value)
  7. # DOCUMENT-START
  8. # DOCUMENT-END
  9. # BLOCK-SEQUENCE-START
  10. # BLOCK-MAPPING-START
  11. # BLOCK-END
  12. # FLOW-SEQUENCE-START
  13. # FLOW-MAPPING-START
  14. # FLOW-SEQUENCE-END
  15. # FLOW-MAPPING-END
  16. # BLOCK-ENTRY
  17. # FLOW-ENTRY
  18. # KEY
  19. # VALUE
  20. # ALIAS(value)
  21. # ANCHOR(value)
  22. # TAG(value)
  23. # SCALAR(value, plain, style)
  24. #
  25. # RoundTripScanner
  26. # COMMENT(value)
  27. #
  28. # Read comments in the Scanner code for more details.
  29. #
  30. from ruamel.yaml.error import MarkedYAMLError
  31. from ruamel.yaml.tokens import * # NOQA
  32. from ruamel.yaml.compat import utf8, unichr, PY3, check_anchorname_char, nprint # NOQA
  33. if False: # MYPY
  34. from typing import Any, Dict, Optional, List, Union, Text # NOQA
  35. from ruamel.yaml.compat import VersionType # NOQA
  36. __all__ = ['Scanner', 'RoundTripScanner', 'ScannerError']
  37. _THE_END = '\n\0\r\x85\u2028\u2029'
  38. _THE_END_SPACE_TAB = ' \n\0\t\r\x85\u2028\u2029'
  39. _SPACE_TAB = ' \t'
  40. class ScannerError(MarkedYAMLError):
  41. pass
  42. class SimpleKey(object):
  43. # See below simple keys treatment.
  44. def __init__(self, token_number, required, index, line, column, mark):
  45. # type: (Any, Any, int, int, int, Any) -> None
  46. self.token_number = token_number
  47. self.required = required
  48. self.index = index
  49. self.line = line
  50. self.column = column
  51. self.mark = mark
  52. class Scanner(object):
  53. def __init__(self, loader=None):
  54. # type: (Any) -> None
  55. """Initialize the scanner."""
  56. # It is assumed that Scanner and Reader will have a common descendant.
  57. # Reader do the dirty work of checking for BOM and converting the
  58. # input data to Unicode. It also adds NUL to the end.
  59. #
  60. # Reader supports the following methods
  61. # self.peek(i=0) # peek the next i-th character
  62. # self.prefix(l=1) # peek the next l characters
  63. # self.forward(l=1) # read the next l characters and move the pointer
  64. self.loader = loader
  65. if self.loader is not None and getattr(self.loader, '_scanner', None) is None:
  66. self.loader._scanner = self
  67. self.reset_scanner()
  68. self.first_time = False
  69. self.yaml_version = None # type: Any
  70. @property
  71. def flow_level(self):
  72. # type: () -> int
  73. return len(self.flow_context)
  74. def reset_scanner(self):
  75. # type: () -> None
  76. # Had we reached the end of the stream?
  77. self.done = False
  78. # flow_context is an expanding/shrinking list consisting of '{' and '['
  79. # for each unclosed flow context. If empty list that means block context
  80. self.flow_context = [] # type: List[Text]
  81. # List of processed tokens that are not yet emitted.
  82. self.tokens = [] # type: List[Any]
  83. # Add the STREAM-START token.
  84. self.fetch_stream_start()
  85. # Number of tokens that were emitted through the `get_token` method.
  86. self.tokens_taken = 0
  87. # The current indentation level.
  88. self.indent = -1
  89. # Past indentation levels.
  90. self.indents = [] # type: List[int]
  91. # Variables related to simple keys treatment.
  92. # A simple key is a key that is not denoted by the '?' indicator.
  93. # Example of simple keys:
  94. # ---
  95. # block simple key: value
  96. # ? not a simple key:
  97. # : { flow simple key: value }
  98. # We emit the KEY token before all keys, so when we find a potential
  99. # simple key, we try to locate the corresponding ':' indicator.
  100. # Simple keys should be limited to a single line and 1024 characters.
  101. # Can a simple key start at the current position? A simple key may
  102. # start:
  103. # - at the beginning of the line, not counting indentation spaces
  104. # (in block context),
  105. # - after '{', '[', ',' (in the flow context),
  106. # - after '?', ':', '-' (in the block context).
  107. # In the block context, this flag also signifies if a block collection
  108. # may start at the current position.
  109. self.allow_simple_key = True
  110. # Keep track of possible simple keys. This is a dictionary. The key
  111. # is `flow_level`; there can be no more that one possible simple key
  112. # for each level. The value is a SimpleKey record:
  113. # (token_number, required, index, line, column, mark)
  114. # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
  115. # '[', or '{' tokens.
  116. self.possible_simple_keys = {} # type: Dict[Any, Any]
  117. @property
  118. def reader(self):
  119. # type: () -> Any
  120. try:
  121. return self._scanner_reader # type: ignore
  122. except AttributeError:
  123. if hasattr(self.loader, 'typ'):
  124. self._scanner_reader = self.loader.reader
  125. else:
  126. self._scanner_reader = self.loader._reader
  127. return self._scanner_reader
  128. @property
  129. def scanner_processing_version(self): # prefix until un-composited
  130. # type: () -> Any
  131. if hasattr(self.loader, 'typ'):
  132. return self.loader.resolver.processing_version
  133. return self.loader.processing_version
  134. # Public methods.
  135. def check_token(self, *choices):
  136. # type: (Any) -> bool
  137. # Check if the next token is one of the given types.
  138. while self.need_more_tokens():
  139. self.fetch_more_tokens()
  140. if bool(self.tokens):
  141. if not choices:
  142. return True
  143. for choice in choices:
  144. if isinstance(self.tokens[0], choice):
  145. return True
  146. return False
  147. def peek_token(self):
  148. # type: () -> Any
  149. # Return the next token, but do not delete if from the queue.
  150. while self.need_more_tokens():
  151. self.fetch_more_tokens()
  152. if bool(self.tokens):
  153. return self.tokens[0]
  154. def get_token(self):
  155. # type: () -> Any
  156. # Return the next token.
  157. while self.need_more_tokens():
  158. self.fetch_more_tokens()
  159. if bool(self.tokens):
  160. self.tokens_taken += 1
  161. return self.tokens.pop(0)
  162. # Private methods.
  163. def need_more_tokens(self):
  164. # type: () -> bool
  165. if self.done:
  166. return False
  167. if not self.tokens:
  168. return True
  169. # The current token may be a potential simple key, so we
  170. # need to look further.
  171. self.stale_possible_simple_keys()
  172. if self.next_possible_simple_key() == self.tokens_taken:
  173. return True
  174. return False
  175. def fetch_comment(self, comment):
  176. # type: (Any) -> None
  177. raise NotImplementedError
  178. def fetch_more_tokens(self):
  179. # type: () -> Any
  180. # Eat whitespaces and comments until we reach the next token.
  181. comment = self.scan_to_next_token()
  182. if comment is not None: # never happens for base scanner
  183. return self.fetch_comment(comment)
  184. # Remove obsolete possible simple keys.
  185. self.stale_possible_simple_keys()
  186. # Compare the current indentation and column. It may add some tokens
  187. # and decrease the current indentation level.
  188. self.unwind_indent(self.reader.column)
  189. # Peek the next character.
  190. ch = self.reader.peek()
  191. # Is it the end of stream?
  192. if ch == '\0':
  193. return self.fetch_stream_end()
  194. # Is it a directive?
  195. if ch == '%' and self.check_directive():
  196. return self.fetch_directive()
  197. # Is it the document start?
  198. if ch == '-' and self.check_document_start():
  199. return self.fetch_document_start()
  200. # Is it the document end?
  201. if ch == '.' and self.check_document_end():
  202. return self.fetch_document_end()
  203. # TODO: support for BOM within a stream.
  204. # if ch == u'\uFEFF':
  205. # return self.fetch_bom() <-- issue BOMToken
  206. # Note: the order of the following checks is NOT significant.
  207. # Is it the flow sequence start indicator?
  208. if ch == '[':
  209. return self.fetch_flow_sequence_start()
  210. # Is it the flow mapping start indicator?
  211. if ch == '{':
  212. return self.fetch_flow_mapping_start()
  213. # Is it the flow sequence end indicator?
  214. if ch == ']':
  215. return self.fetch_flow_sequence_end()
  216. # Is it the flow mapping end indicator?
  217. if ch == '}':
  218. return self.fetch_flow_mapping_end()
  219. # Is it the flow entry indicator?
  220. if ch == ',':
  221. return self.fetch_flow_entry()
  222. # Is it the block entry indicator?
  223. if ch == '-' and self.check_block_entry():
  224. return self.fetch_block_entry()
  225. # Is it the key indicator?
  226. if ch == '?' and self.check_key():
  227. return self.fetch_key()
  228. # Is it the value indicator?
  229. if ch == ':' and self.check_value():
  230. return self.fetch_value()
  231. # Is it an alias?
  232. if ch == '*':
  233. return self.fetch_alias()
  234. # Is it an anchor?
  235. if ch == '&':
  236. return self.fetch_anchor()
  237. # Is it a tag?
  238. if ch == '!':
  239. return self.fetch_tag()
  240. # Is it a literal scalar?
  241. if ch == '|' and not self.flow_level:
  242. return self.fetch_literal()
  243. # Is it a folded scalar?
  244. if ch == '>' and not self.flow_level:
  245. return self.fetch_folded()
  246. # Is it a single quoted scalar?
  247. if ch == "'":
  248. return self.fetch_single()
  249. # Is it a double quoted scalar?
  250. if ch == '"':
  251. return self.fetch_double()
  252. # It must be a plain scalar then.
  253. if self.check_plain():
  254. return self.fetch_plain()
  255. # No? It's an error. Let's produce a nice error message.
  256. raise ScannerError(
  257. 'while scanning for the next token',
  258. None,
  259. 'found character %r that cannot start any token' % utf8(ch),
  260. self.reader.get_mark(),
  261. )
  262. # Simple keys treatment.
  263. def next_possible_simple_key(self):
  264. # type: () -> Any
  265. # Return the number of the nearest possible simple key. Actually we
  266. # don't need to loop through the whole dictionary. We may replace it
  267. # with the following code:
  268. # if not self.possible_simple_keys:
  269. # return None
  270. # return self.possible_simple_keys[
  271. # min(self.possible_simple_keys.keys())].token_number
  272. min_token_number = None
  273. for level in self.possible_simple_keys:
  274. key = self.possible_simple_keys[level]
  275. if min_token_number is None or key.token_number < min_token_number:
  276. min_token_number = key.token_number
  277. return min_token_number
  278. def stale_possible_simple_keys(self):
  279. # type: () -> None
  280. # Remove entries that are no longer possible simple keys. According to
  281. # the YAML specification, simple keys
  282. # - should be limited to a single line,
  283. # - should be no longer than 1024 characters.
  284. # Disabling this procedure will allow simple keys of any length and
  285. # height (may cause problems if indentation is broken though).
  286. for level in list(self.possible_simple_keys):
  287. key = self.possible_simple_keys[level]
  288. if key.line != self.reader.line or self.reader.index - key.index > 1024:
  289. if key.required:
  290. raise ScannerError(
  291. 'while scanning a simple key',
  292. key.mark,
  293. "could not find expected ':'",
  294. self.reader.get_mark(),
  295. )
  296. del self.possible_simple_keys[level]
  297. def save_possible_simple_key(self):
  298. # type: () -> None
  299. # The next token may start a simple key. We check if it's possible
  300. # and save its position. This function is called for
  301. # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
  302. # Check if a simple key is required at the current position.
  303. required = not self.flow_level and self.indent == self.reader.column
  304. # The next token might be a simple key. Let's save it's number and
  305. # position.
  306. if self.allow_simple_key:
  307. self.remove_possible_simple_key()
  308. token_number = self.tokens_taken + len(self.tokens)
  309. key = SimpleKey(
  310. token_number,
  311. required,
  312. self.reader.index,
  313. self.reader.line,
  314. self.reader.column,
  315. self.reader.get_mark(),
  316. )
  317. self.possible_simple_keys[self.flow_level] = key
  318. def remove_possible_simple_key(self):
  319. # type: () -> None
  320. # Remove the saved possible key position at the current flow level.
  321. if self.flow_level in self.possible_simple_keys:
  322. key = self.possible_simple_keys[self.flow_level]
  323. if key.required:
  324. raise ScannerError(
  325. 'while scanning a simple key',
  326. key.mark,
  327. "could not find expected ':'",
  328. self.reader.get_mark(),
  329. )
  330. del self.possible_simple_keys[self.flow_level]
  331. # Indentation functions.
  332. def unwind_indent(self, column):
  333. # type: (Any) -> None
  334. # In flow context, tokens should respect indentation.
  335. # Actually the condition should be `self.indent >= column` according to
  336. # the spec. But this condition will prohibit intuitively correct
  337. # constructions such as
  338. # key : {
  339. # }
  340. # ####
  341. # if self.flow_level and self.indent > column:
  342. # raise ScannerError(None, None,
  343. # "invalid intendation or unclosed '[' or '{'",
  344. # self.reader.get_mark())
  345. # In the flow context, indentation is ignored. We make the scanner less
  346. # restrictive then specification requires.
  347. if bool(self.flow_level):
  348. return
  349. # In block context, we may need to issue the BLOCK-END tokens.
  350. while self.indent > column:
  351. mark = self.reader.get_mark()
  352. self.indent = self.indents.pop()
  353. self.tokens.append(BlockEndToken(mark, mark))
  354. def add_indent(self, column):
  355. # type: (int) -> bool
  356. # Check if we need to increase indentation.
  357. if self.indent < column:
  358. self.indents.append(self.indent)
  359. self.indent = column
  360. return True
  361. return False
  362. # Fetchers.
  363. def fetch_stream_start(self):
  364. # type: () -> None
  365. # We always add STREAM-START as the first token and STREAM-END as the
  366. # last token.
  367. # Read the token.
  368. mark = self.reader.get_mark()
  369. # Add STREAM-START.
  370. self.tokens.append(StreamStartToken(mark, mark, encoding=self.reader.encoding))
  371. def fetch_stream_end(self):
  372. # type: () -> None
  373. # Set the current intendation to -1.
  374. self.unwind_indent(-1)
  375. # Reset simple keys.
  376. self.remove_possible_simple_key()
  377. self.allow_simple_key = False
  378. self.possible_simple_keys = {}
  379. # Read the token.
  380. mark = self.reader.get_mark()
  381. # Add STREAM-END.
  382. self.tokens.append(StreamEndToken(mark, mark))
  383. # The steam is finished.
  384. self.done = True
  385. def fetch_directive(self):
  386. # type: () -> None
  387. # Set the current intendation to -1.
  388. self.unwind_indent(-1)
  389. # Reset simple keys.
  390. self.remove_possible_simple_key()
  391. self.allow_simple_key = False
  392. # Scan and add DIRECTIVE.
  393. self.tokens.append(self.scan_directive())
  394. def fetch_document_start(self):
  395. # type: () -> None
  396. self.fetch_document_indicator(DocumentStartToken)
  397. def fetch_document_end(self):
  398. # type: () -> None
  399. self.fetch_document_indicator(DocumentEndToken)
  400. def fetch_document_indicator(self, TokenClass):
  401. # type: (Any) -> None
  402. # Set the current intendation to -1.
  403. self.unwind_indent(-1)
  404. # Reset simple keys. Note that there could not be a block collection
  405. # after '---'.
  406. self.remove_possible_simple_key()
  407. self.allow_simple_key = False
  408. # Add DOCUMENT-START or DOCUMENT-END.
  409. start_mark = self.reader.get_mark()
  410. self.reader.forward(3)
  411. end_mark = self.reader.get_mark()
  412. self.tokens.append(TokenClass(start_mark, end_mark))
  413. def fetch_flow_sequence_start(self):
  414. # type: () -> None
  415. self.fetch_flow_collection_start(FlowSequenceStartToken, to_push='[')
  416. def fetch_flow_mapping_start(self):
  417. # type: () -> None
  418. self.fetch_flow_collection_start(FlowMappingStartToken, to_push='{')
  419. def fetch_flow_collection_start(self, TokenClass, to_push):
  420. # type: (Any, Text) -> None
  421. # '[' and '{' may start a simple key.
  422. self.save_possible_simple_key()
  423. # Increase the flow level.
  424. self.flow_context.append(to_push)
  425. # Simple keys are allowed after '[' and '{'.
  426. self.allow_simple_key = True
  427. # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
  428. start_mark = self.reader.get_mark()
  429. self.reader.forward()
  430. end_mark = self.reader.get_mark()
  431. self.tokens.append(TokenClass(start_mark, end_mark))
  432. def fetch_flow_sequence_end(self):
  433. # type: () -> None
  434. self.fetch_flow_collection_end(FlowSequenceEndToken)
  435. def fetch_flow_mapping_end(self):
  436. # type: () -> None
  437. self.fetch_flow_collection_end(FlowMappingEndToken)
  438. def fetch_flow_collection_end(self, TokenClass):
  439. # type: (Any) -> None
  440. # Reset possible simple key on the current level.
  441. self.remove_possible_simple_key()
  442. # Decrease the flow level.
  443. try:
  444. popped = self.flow_context.pop() # NOQA
  445. except IndexError:
  446. # We must not be in a list or object.
  447. # Defer error handling to the parser.
  448. pass
  449. # No simple keys after ']' or '}'.
  450. self.allow_simple_key = False
  451. # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
  452. start_mark = self.reader.get_mark()
  453. self.reader.forward()
  454. end_mark = self.reader.get_mark()
  455. self.tokens.append(TokenClass(start_mark, end_mark))
  456. def fetch_flow_entry(self):
  457. # type: () -> None
  458. # Simple keys are allowed after ','.
  459. self.allow_simple_key = True
  460. # Reset possible simple key on the current level.
  461. self.remove_possible_simple_key()
  462. # Add FLOW-ENTRY.
  463. start_mark = self.reader.get_mark()
  464. self.reader.forward()
  465. end_mark = self.reader.get_mark()
  466. self.tokens.append(FlowEntryToken(start_mark, end_mark))
  467. def fetch_block_entry(self):
  468. # type: () -> None
  469. # Block context needs additional checks.
  470. if not self.flow_level:
  471. # Are we allowed to start a new entry?
  472. if not self.allow_simple_key:
  473. raise ScannerError(
  474. None, None, 'sequence entries are not allowed here', self.reader.get_mark()
  475. )
  476. # We may need to add BLOCK-SEQUENCE-START.
  477. if self.add_indent(self.reader.column):
  478. mark = self.reader.get_mark()
  479. self.tokens.append(BlockSequenceStartToken(mark, mark))
  480. # It's an error for the block entry to occur in the flow context,
  481. # but we let the parser detect this.
  482. else:
  483. pass
  484. # Simple keys are allowed after '-'.
  485. self.allow_simple_key = True
  486. # Reset possible simple key on the current level.
  487. self.remove_possible_simple_key()
  488. # Add BLOCK-ENTRY.
  489. start_mark = self.reader.get_mark()
  490. self.reader.forward()
  491. end_mark = self.reader.get_mark()
  492. self.tokens.append(BlockEntryToken(start_mark, end_mark))
  493. def fetch_key(self):
  494. # type: () -> None
  495. # Block context needs additional checks.
  496. if not self.flow_level:
  497. # Are we allowed to start a key (not nessesary a simple)?
  498. if not self.allow_simple_key:
  499. raise ScannerError(
  500. None, None, 'mapping keys are not allowed here', self.reader.get_mark()
  501. )
  502. # We may need to add BLOCK-MAPPING-START.
  503. if self.add_indent(self.reader.column):
  504. mark = self.reader.get_mark()
  505. self.tokens.append(BlockMappingStartToken(mark, mark))
  506. # Simple keys are allowed after '?' in the block context.
  507. self.allow_simple_key = not self.flow_level
  508. # Reset possible simple key on the current level.
  509. self.remove_possible_simple_key()
  510. # Add KEY.
  511. start_mark = self.reader.get_mark()
  512. self.reader.forward()
  513. end_mark = self.reader.get_mark()
  514. self.tokens.append(KeyToken(start_mark, end_mark))
  515. def fetch_value(self):
  516. # type: () -> None
  517. # Do we determine a simple key?
  518. if self.flow_level in self.possible_simple_keys:
  519. # Add KEY.
  520. key = self.possible_simple_keys[self.flow_level]
  521. del self.possible_simple_keys[self.flow_level]
  522. self.tokens.insert(
  523. key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark)
  524. )
  525. # If this key starts a new block mapping, we need to add
  526. # BLOCK-MAPPING-START.
  527. if not self.flow_level:
  528. if self.add_indent(key.column):
  529. self.tokens.insert(
  530. key.token_number - self.tokens_taken,
  531. BlockMappingStartToken(key.mark, key.mark),
  532. )
  533. # There cannot be two simple keys one after another.
  534. self.allow_simple_key = False
  535. # It must be a part of a complex key.
  536. else:
  537. # Block context needs additional checks.
  538. # (Do we really need them? They will be caught by the parser
  539. # anyway.)
  540. if not self.flow_level:
  541. # We are allowed to start a complex value if and only if
  542. # we can start a simple key.
  543. if not self.allow_simple_key:
  544. raise ScannerError(
  545. None,
  546. None,
  547. 'mapping values are not allowed here',
  548. self.reader.get_mark(),
  549. )
  550. # If this value starts a new block mapping, we need to add
  551. # BLOCK-MAPPING-START. It will be detected as an error later by
  552. # the parser.
  553. if not self.flow_level:
  554. if self.add_indent(self.reader.column):
  555. mark = self.reader.get_mark()
  556. self.tokens.append(BlockMappingStartToken(mark, mark))
  557. # Simple keys are allowed after ':' in the block context.
  558. self.allow_simple_key = not self.flow_level
  559. # Reset possible simple key on the current level.
  560. self.remove_possible_simple_key()
  561. # Add VALUE.
  562. start_mark = self.reader.get_mark()
  563. self.reader.forward()
  564. end_mark = self.reader.get_mark()
  565. self.tokens.append(ValueToken(start_mark, end_mark))
  566. def fetch_alias(self):
  567. # type: () -> None
  568. # ALIAS could be a simple key.
  569. self.save_possible_simple_key()
  570. # No simple keys after ALIAS.
  571. self.allow_simple_key = False
  572. # Scan and add ALIAS.
  573. self.tokens.append(self.scan_anchor(AliasToken))
  574. def fetch_anchor(self):
  575. # type: () -> None
  576. # ANCHOR could start a simple key.
  577. self.save_possible_simple_key()
  578. # No simple keys after ANCHOR.
  579. self.allow_simple_key = False
  580. # Scan and add ANCHOR.
  581. self.tokens.append(self.scan_anchor(AnchorToken))
  582. def fetch_tag(self):
  583. # type: () -> None
  584. # TAG could start a simple key.
  585. self.save_possible_simple_key()
  586. # No simple keys after TAG.
  587. self.allow_simple_key = False
  588. # Scan and add TAG.
  589. self.tokens.append(self.scan_tag())
  590. def fetch_literal(self):
  591. # type: () -> None
  592. self.fetch_block_scalar(style='|')
  593. def fetch_folded(self):
  594. # type: () -> None
  595. self.fetch_block_scalar(style='>')
  596. def fetch_block_scalar(self, style):
  597. # type: (Any) -> None
  598. # A simple key may follow a block scalar.
  599. self.allow_simple_key = True
  600. # Reset possible simple key on the current level.
  601. self.remove_possible_simple_key()
  602. # Scan and add SCALAR.
  603. self.tokens.append(self.scan_block_scalar(style))
  604. def fetch_single(self):
  605. # type: () -> None
  606. self.fetch_flow_scalar(style="'")
  607. def fetch_double(self):
  608. # type: () -> None
  609. self.fetch_flow_scalar(style='"')
  610. def fetch_flow_scalar(self, style):
  611. # type: (Any) -> None
  612. # A flow scalar could be a simple key.
  613. self.save_possible_simple_key()
  614. # No simple keys after flow scalars.
  615. self.allow_simple_key = False
  616. # Scan and add SCALAR.
  617. self.tokens.append(self.scan_flow_scalar(style))
  618. def fetch_plain(self):
  619. # type: () -> None
  620. # A plain scalar could be a simple key.
  621. self.save_possible_simple_key()
  622. # No simple keys after plain scalars. But note that `scan_plain` will
  623. # change this flag if the scan is finished at the beginning of the
  624. # line.
  625. self.allow_simple_key = False
  626. # Scan and add SCALAR. May change `allow_simple_key`.
  627. self.tokens.append(self.scan_plain())
  628. # Checkers.
  629. def check_directive(self):
  630. # type: () -> Any
  631. # DIRECTIVE: ^ '%' ...
  632. # The '%' indicator is already checked.
  633. if self.reader.column == 0:
  634. return True
  635. return None
  636. def check_document_start(self):
  637. # type: () -> Any
  638. # DOCUMENT-START: ^ '---' (' '|'\n')
  639. if self.reader.column == 0:
  640. if self.reader.prefix(3) == '---' and self.reader.peek(3) in _THE_END_SPACE_TAB:
  641. return True
  642. return None
  643. def check_document_end(self):
  644. # type: () -> Any
  645. # DOCUMENT-END: ^ '...' (' '|'\n')
  646. if self.reader.column == 0:
  647. if self.reader.prefix(3) == '...' and self.reader.peek(3) in _THE_END_SPACE_TAB:
  648. return True
  649. return None
  650. def check_block_entry(self):
  651. # type: () -> Any
  652. # BLOCK-ENTRY: '-' (' '|'\n')
  653. return self.reader.peek(1) in _THE_END_SPACE_TAB
  654. def check_key(self):
  655. # type: () -> Any
  656. # KEY(flow context): '?'
  657. if bool(self.flow_level):
  658. return True
  659. # KEY(block context): '?' (' '|'\n')
  660. return self.reader.peek(1) in _THE_END_SPACE_TAB
  661. def check_value(self):
  662. # type: () -> Any
  663. # VALUE(flow context): ':'
  664. if self.scanner_processing_version == (1, 1):
  665. if bool(self.flow_level):
  666. return True
  667. else:
  668. if bool(self.flow_level):
  669. if self.flow_context[-1] == '[':
  670. if self.reader.peek(1) not in _THE_END_SPACE_TAB:
  671. return False
  672. elif self.tokens and isinstance(self.tokens[-1], ValueToken):
  673. # mapping flow context scanning a value token
  674. if self.reader.peek(1) not in _THE_END_SPACE_TAB:
  675. return False
  676. return True
  677. # VALUE(block context): ':' (' '|'\n')
  678. return self.reader.peek(1) in _THE_END_SPACE_TAB
  679. def check_plain(self):
  680. # type: () -> Any
  681. # A plain scalar may start with any non-space character except:
  682. # '-', '?', ':', ',', '[', ']', '{', '}',
  683. # '#', '&', '*', '!', '|', '>', '\'', '\"',
  684. # '%', '@', '`'.
  685. #
  686. # It may also start with
  687. # '-', '?', ':'
  688. # if it is followed by a non-space character.
  689. #
  690. # Note that we limit the last rule to the block context (except the
  691. # '-' character) because we want the flow context to be space
  692. # independent.
  693. srp = self.reader.peek
  694. ch = srp()
  695. if self.scanner_processing_version == (1, 1):
  696. return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`' or (
  697. srp(1) not in _THE_END_SPACE_TAB
  698. and (ch == '-' or (not self.flow_level and ch in '?:'))
  699. )
  700. # YAML 1.2
  701. if ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'"%@`':
  702. # ################### ^ ???
  703. return True
  704. ch1 = srp(1)
  705. if ch == '-' and ch1 not in _THE_END_SPACE_TAB:
  706. return True
  707. if ch == ':' and bool(self.flow_level) and ch1 not in _SPACE_TAB:
  708. return True
  709. return srp(1) not in _THE_END_SPACE_TAB and (
  710. ch == '-' or (not self.flow_level and ch in '?:')
  711. )
  712. # Scanners.
  713. def scan_to_next_token(self):
  714. # type: () -> Any
  715. # We ignore spaces, line breaks and comments.
  716. # If we find a line break in the block context, we set the flag
  717. # `allow_simple_key` on.
  718. # The byte order mark is stripped if it's the first character in the
  719. # stream. We do not yet support BOM inside the stream as the
  720. # specification requires. Any such mark will be considered as a part
  721. # of the document.
  722. #
  723. # TODO: We need to make tab handling rules more sane. A good rule is
  724. # Tabs cannot precede tokens
  725. # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
  726. # KEY(block), VALUE(block), BLOCK-ENTRY
  727. # So the checking code is
  728. # if <TAB>:
  729. # self.allow_simple_keys = False
  730. # We also need to add the check for `allow_simple_keys == True` to
  731. # `unwind_indent` before issuing BLOCK-END.
  732. # Scanners for block, flow, and plain scalars need to be modified.
  733. srp = self.reader.peek
  734. srf = self.reader.forward
  735. if self.reader.index == 0 and srp() == '\uFEFF':
  736. srf()
  737. found = False
  738. _the_end = _THE_END
  739. while not found:
  740. while srp() == ' ':
  741. srf()
  742. if srp() == '#':
  743. while srp() not in _the_end:
  744. srf()
  745. if self.scan_line_break():
  746. if not self.flow_level:
  747. self.allow_simple_key = True
  748. else:
  749. found = True
  750. return None
  751. def scan_directive(self):
  752. # type: () -> Any
  753. # See the specification for details.
  754. srp = self.reader.peek
  755. srf = self.reader.forward
  756. start_mark = self.reader.get_mark()
  757. srf()
  758. name = self.scan_directive_name(start_mark)
  759. value = None
  760. if name == 'YAML':
  761. value = self.scan_yaml_directive_value(start_mark)
  762. end_mark = self.reader.get_mark()
  763. elif name == 'TAG':
  764. value = self.scan_tag_directive_value(start_mark)
  765. end_mark = self.reader.get_mark()
  766. else:
  767. end_mark = self.reader.get_mark()
  768. while srp() not in _THE_END:
  769. srf()
  770. self.scan_directive_ignored_line(start_mark)
  771. return DirectiveToken(name, value, start_mark, end_mark)
  772. def scan_directive_name(self, start_mark):
  773. # type: (Any) -> Any
  774. # See the specification for details.
  775. length = 0
  776. srp = self.reader.peek
  777. ch = srp(length)
  778. while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_:.':
  779. length += 1
  780. ch = srp(length)
  781. if not length:
  782. raise ScannerError(
  783. 'while scanning a directive',
  784. start_mark,
  785. 'expected alphabetic or numeric character, but found %r' % utf8(ch),
  786. self.reader.get_mark(),
  787. )
  788. value = self.reader.prefix(length)
  789. self.reader.forward(length)
  790. ch = srp()
  791. if ch not in '\0 \r\n\x85\u2028\u2029':
  792. raise ScannerError(
  793. 'while scanning a directive',
  794. start_mark,
  795. 'expected alphabetic or numeric character, but found %r' % utf8(ch),
  796. self.reader.get_mark(),
  797. )
  798. return value
  799. def scan_yaml_directive_value(self, start_mark):
  800. # type: (Any) -> Any
  801. # See the specification for details.
  802. srp = self.reader.peek
  803. srf = self.reader.forward
  804. while srp() == ' ':
  805. srf()
  806. major = self.scan_yaml_directive_number(start_mark)
  807. if srp() != '.':
  808. raise ScannerError(
  809. 'while scanning a directive',
  810. start_mark,
  811. "expected a digit or '.', but found %r" % utf8(srp()),
  812. self.reader.get_mark(),
  813. )
  814. srf()
  815. minor = self.scan_yaml_directive_number(start_mark)
  816. if srp() not in '\0 \r\n\x85\u2028\u2029':
  817. raise ScannerError(
  818. 'while scanning a directive',
  819. start_mark,
  820. "expected a digit or ' ', but found %r" % utf8(srp()),
  821. self.reader.get_mark(),
  822. )
  823. self.yaml_version = (major, minor)
  824. return self.yaml_version
  825. def scan_yaml_directive_number(self, start_mark):
  826. # type: (Any) -> Any
  827. # See the specification for details.
  828. srp = self.reader.peek
  829. srf = self.reader.forward
  830. ch = srp()
  831. if not ('0' <= ch <= '9'):
  832. raise ScannerError(
  833. 'while scanning a directive',
  834. start_mark,
  835. 'expected a digit, but found %r' % utf8(ch),
  836. self.reader.get_mark(),
  837. )
  838. length = 0
  839. while '0' <= srp(length) <= '9':
  840. length += 1
  841. value = int(self.reader.prefix(length))
  842. srf(length)
  843. return value
  844. def scan_tag_directive_value(self, start_mark):
  845. # type: (Any) -> Any
  846. # See the specification for details.
  847. srp = self.reader.peek
  848. srf = self.reader.forward
  849. while srp() == ' ':
  850. srf()
  851. handle = self.scan_tag_directive_handle(start_mark)
  852. while srp() == ' ':
  853. srf()
  854. prefix = self.scan_tag_directive_prefix(start_mark)
  855. return (handle, prefix)
  856. def scan_tag_directive_handle(self, start_mark):
  857. # type: (Any) -> Any
  858. # See the specification for details.
  859. value = self.scan_tag_handle('directive', start_mark)
  860. ch = self.reader.peek()
  861. if ch != ' ':
  862. raise ScannerError(
  863. 'while scanning a directive',
  864. start_mark,
  865. "expected ' ', but found %r" % utf8(ch),
  866. self.reader.get_mark(),
  867. )
  868. return value
  869. def scan_tag_directive_prefix(self, start_mark):
  870. # type: (Any) -> Any
  871. # See the specification for details.
  872. value = self.scan_tag_uri('directive', start_mark)
  873. ch = self.reader.peek()
  874. if ch not in '\0 \r\n\x85\u2028\u2029':
  875. raise ScannerError(
  876. 'while scanning a directive',
  877. start_mark,
  878. "expected ' ', but found %r" % utf8(ch),
  879. self.reader.get_mark(),
  880. )
  881. return value
  882. def scan_directive_ignored_line(self, start_mark):
  883. # type: (Any) -> None
  884. # See the specification for details.
  885. srp = self.reader.peek
  886. srf = self.reader.forward
  887. while srp() == ' ':
  888. srf()
  889. if srp() == '#':
  890. while srp() not in _THE_END:
  891. srf()
  892. ch = srp()
  893. if ch not in _THE_END:
  894. raise ScannerError(
  895. 'while scanning a directive',
  896. start_mark,
  897. 'expected a comment or a line break, but found %r' % utf8(ch),
  898. self.reader.get_mark(),
  899. )
  900. self.scan_line_break()
  901. def scan_anchor(self, TokenClass):
  902. # type: (Any) -> Any
  903. # The specification does not restrict characters for anchors and
  904. # aliases. This may lead to problems, for instance, the document:
  905. # [ *alias, value ]
  906. # can be interpteted in two ways, as
  907. # [ "value" ]
  908. # and
  909. # [ *alias , "value" ]
  910. # Therefore we restrict aliases to numbers and ASCII letters.
  911. srp = self.reader.peek
  912. start_mark = self.reader.get_mark()
  913. indicator = srp()
  914. if indicator == '*':
  915. name = 'alias'
  916. else:
  917. name = 'anchor'
  918. self.reader.forward()
  919. length = 0
  920. ch = srp(length)
  921. # while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
  922. # or ch in u'-_':
  923. while check_anchorname_char(ch):
  924. length += 1
  925. ch = srp(length)
  926. if not length:
  927. raise ScannerError(
  928. 'while scanning an %s' % (name,),
  929. start_mark,
  930. 'expected alphabetic or numeric character, but found %r' % utf8(ch),
  931. self.reader.get_mark(),
  932. )
  933. value = self.reader.prefix(length)
  934. self.reader.forward(length)
  935. # ch1 = ch
  936. # ch = srp() # no need to peek, ch is already set
  937. # assert ch1 == ch
  938. if ch not in '\0 \t\r\n\x85\u2028\u2029?:,[]{}%@`':
  939. raise ScannerError(
  940. 'while scanning an %s' % (name,),
  941. start_mark,
  942. 'expected alphabetic or numeric character, but found %r' % utf8(ch),
  943. self.reader.get_mark(),
  944. )
  945. end_mark = self.reader.get_mark()
  946. return TokenClass(value, start_mark, end_mark)
  947. def scan_tag(self):
  948. # type: () -> Any
  949. # See the specification for details.
  950. srp = self.reader.peek
  951. start_mark = self.reader.get_mark()
  952. ch = srp(1)
  953. if ch == '<':
  954. handle = None
  955. self.reader.forward(2)
  956. suffix = self.scan_tag_uri('tag', start_mark)
  957. if srp() != '>':
  958. raise ScannerError(
  959. 'while parsing a tag',
  960. start_mark,
  961. "expected '>', but found %r" % utf8(srp()),
  962. self.reader.get_mark(),
  963. )
  964. self.reader.forward()
  965. elif ch in _THE_END_SPACE_TAB:
  966. handle = None
  967. suffix = '!'
  968. self.reader.forward()
  969. else:
  970. length = 1
  971. use_handle = False
  972. while ch not in '\0 \r\n\x85\u2028\u2029':
  973. if ch == '!':
  974. use_handle = True
  975. break
  976. length += 1
  977. ch = srp(length)
  978. handle = '!'
  979. if use_handle:
  980. handle = self.scan_tag_handle('tag', start_mark)
  981. else:
  982. handle = '!'
  983. self.reader.forward()
  984. suffix = self.scan_tag_uri('tag', start_mark)
  985. ch = srp()
  986. if ch not in '\0 \r\n\x85\u2028\u2029':
  987. raise ScannerError(
  988. 'while scanning a tag',
  989. start_mark,
  990. "expected ' ', but found %r" % utf8(ch),
  991. self.reader.get_mark(),
  992. )
  993. value = (handle, suffix)
  994. end_mark = self.reader.get_mark()
  995. return TagToken(value, start_mark, end_mark)
  996. def scan_block_scalar(self, style, rt=False):
  997. # type: (Any, Optional[bool]) -> Any
  998. # See the specification for details.
  999. srp = self.reader.peek
  1000. if style == '>':
  1001. folded = True
  1002. else:
  1003. folded = False
  1004. chunks = [] # type: List[Any]
  1005. start_mark = self.reader.get_mark()
  1006. # Scan the header.
  1007. self.reader.forward()
  1008. chomping, increment = self.scan_block_scalar_indicators(start_mark)
  1009. # block scalar comment e.g. : |+ # comment text
  1010. block_scalar_comment = self.scan_block_scalar_ignored_line(start_mark)
  1011. # Determine the indentation level and go to the first non-empty line.
  1012. min_indent = self.indent + 1
  1013. if increment is None:
  1014. # no increment and top level, min_indent could be 0
  1015. if min_indent < 1 and (
  1016. style not in '|>'
  1017. or (self.scanner_processing_version == (1, 1))
  1018. and getattr(
  1019. self.loader, 'top_level_block_style_scalar_no_indent_error_1_1', False
  1020. )
  1021. ):
  1022. min_indent = 1
  1023. breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
  1024. indent = max(min_indent, max_indent)
  1025. else:
  1026. if min_indent < 1:
  1027. min_indent = 1
  1028. indent = min_indent + increment - 1
  1029. breaks, end_mark = self.scan_block_scalar_breaks(indent)
  1030. line_break = ""
  1031. # Scan the inner part of the block scalar.
  1032. while self.reader.column == indent and srp() != '\0':
  1033. chunks.extend(breaks)
  1034. leading_non_space = srp() not in ' \t'
  1035. length = 0
  1036. while srp(length) not in _THE_END:
  1037. length += 1
  1038. chunks.append(self.reader.prefix(length))
  1039. self.reader.forward(length)
  1040. line_break = self.scan_line_break()
  1041. breaks, end_mark = self.scan_block_scalar_breaks(indent)
  1042. if style in '|>' and min_indent == 0:
  1043. # at the beginning of a line, if in block style see if
  1044. # end of document/start_new_document
  1045. if self.check_document_start() or self.check_document_end():
  1046. break
  1047. if self.reader.column == indent and srp() != '\0':
  1048. # Unfortunately, folding rules are ambiguous.
  1049. #
  1050. # This is the folding according to the specification:
  1051. if rt and folded and line_break == '\n':
  1052. chunks.append('\a')
  1053. if folded and line_break == '\n' and leading_non_space and srp() not in ' \t':
  1054. if not breaks:
  1055. chunks.append(' ')
  1056. else:
  1057. chunks.append(line_break)
  1058. # This is Clark Evans's interpretation (also in the spec
  1059. # examples):
  1060. #
  1061. # if folded and line_break == u'\n':
  1062. # if not breaks:
  1063. # if srp() not in ' \t':
  1064. # chunks.append(u' ')
  1065. # else:
  1066. # chunks.append(line_break)
  1067. # else:
  1068. # chunks.append(line_break)
  1069. else:
  1070. break
  1071. # Process trailing line breaks. The 'chomping' setting determines
  1072. # whether they are included in the value.
  1073. trailing = [] # type: List[Any]
  1074. if chomping in [None, True]:
  1075. chunks.append(line_break)
  1076. if chomping is True:
  1077. chunks.extend(breaks)
  1078. elif chomping in [None, False]:
  1079. trailing.extend(breaks)
  1080. # We are done.
  1081. token = ScalarToken("".join(chunks), False, start_mark, end_mark, style)
  1082. if block_scalar_comment is not None:
  1083. token.add_pre_comments([block_scalar_comment])
  1084. if len(trailing) > 0:
  1085. # nprint('trailing 1', trailing) # XXXXX
  1086. # Eat whitespaces and comments until we reach the next token.
  1087. comment = self.scan_to_next_token()
  1088. while comment:
  1089. trailing.append(' ' * comment[1].column + comment[0])
  1090. comment = self.scan_to_next_token()
  1091. # Keep track of the trailing whitespace and following comments
  1092. # as a comment token, if isn't all included in the actual value.
  1093. comment_end_mark = self.reader.get_mark()
  1094. comment = CommentToken("".join(trailing), end_mark, comment_end_mark)
  1095. token.add_post_comment(comment)
  1096. return token
  1097. def scan_block_scalar_indicators(self, start_mark):
  1098. # type: (Any) -> Any
  1099. # See the specification for details.
  1100. srp = self.reader.peek
  1101. chomping = None
  1102. increment = None
  1103. ch = srp()
  1104. if ch in '+-':
  1105. if ch == '+':
  1106. chomping = True
  1107. else:
  1108. chomping = False
  1109. self.reader.forward()
  1110. ch = srp()
  1111. if ch in '0123456789':
  1112. increment = int(ch)
  1113. if increment == 0:
  1114. raise ScannerError(
  1115. 'while scanning a block scalar',
  1116. start_mark,
  1117. 'expected indentation indicator in the range 1-9, ' 'but found 0',
  1118. self.reader.get_mark(),
  1119. )
  1120. self.reader.forward()
  1121. elif ch in '0123456789':
  1122. increment = int(ch)
  1123. if increment == 0:
  1124. raise ScannerError(
  1125. 'while scanning a block scalar',
  1126. start_mark,
  1127. 'expected indentation indicator in the range 1-9, ' 'but found 0',
  1128. self.reader.get_mark(),
  1129. )
  1130. self.reader.forward()
  1131. ch = srp()
  1132. if ch in '+-':
  1133. if ch == '+':
  1134. chomping = True
  1135. else:
  1136. chomping = False
  1137. self.reader.forward()
  1138. ch = srp()
  1139. if ch not in '\0 \r\n\x85\u2028\u2029':
  1140. raise ScannerError(
  1141. 'while scanning a block scalar',
  1142. start_mark,
  1143. 'expected chomping or indentation indicators, but found %r' % utf8(ch),
  1144. self.reader.get_mark(),
  1145. )
  1146. return chomping, increment
  1147. def scan_block_scalar_ignored_line(self, start_mark):
  1148. # type: (Any) -> Any
  1149. # See the specification for details.
  1150. srp = self.reader.peek
  1151. srf = self.reader.forward
  1152. prefix = ''
  1153. comment = None
  1154. while srp() == ' ':
  1155. prefix += srp()
  1156. srf()
  1157. if srp() == '#':
  1158. comment = prefix
  1159. while srp() not in _THE_END:
  1160. comment += srp()
  1161. srf()
  1162. ch = srp()
  1163. if ch not in _THE_END:
  1164. raise ScannerError(
  1165. 'while scanning a block scalar',
  1166. start_mark,
  1167. 'expected a comment or a line break, but found %r' % utf8(ch),
  1168. self.reader.get_mark(),
  1169. )
  1170. self.scan_line_break()
  1171. return comment
  1172. def scan_block_scalar_indentation(self):
  1173. # type: () -> Any
  1174. # See the specification for details.
  1175. srp = self.reader.peek
  1176. srf = self.reader.forward
  1177. chunks = []
  1178. max_indent = 0
  1179. end_mark = self.reader.get_mark()
  1180. while srp() in ' \r\n\x85\u2028\u2029':
  1181. if srp() != ' ':
  1182. chunks.append(self.scan_line_break())
  1183. end_mark = self.reader.get_mark()
  1184. else:
  1185. srf()
  1186. if self.reader.column > max_indent:
  1187. max_indent = self.reader.column
  1188. return chunks, max_indent, end_mark
  1189. def scan_block_scalar_breaks(self, indent):
  1190. # type: (int) -> Any
  1191. # See the specification for details.
  1192. chunks = []
  1193. srp = self.reader.peek
  1194. srf = self.reader.forward
  1195. end_mark = self.reader.get_mark()
  1196. while self.reader.column < indent and srp() == ' ':
  1197. srf()
  1198. while srp() in '\r\n\x85\u2028\u2029':
  1199. chunks.append(self.scan_line_break())
  1200. end_mark = self.reader.get_mark()
  1201. while self.reader.column < indent and srp() == ' ':
  1202. srf()
  1203. return chunks, end_mark
  1204. def scan_flow_scalar(self, style):
  1205. # type: (Any) -> Any
  1206. # See the specification for details.
  1207. # Note that we loose indentation rules for quoted scalars. Quoted
  1208. # scalars don't need to adhere indentation because " and ' clearly
  1209. # mark the beginning and the end of them. Therefore we are less
  1210. # restrictive then the specification requires. We only need to check
  1211. # that document separators are not included in scalars.
  1212. if style == '"':
  1213. double = True
  1214. else:
  1215. double = False
  1216. srp = self.reader.peek
  1217. chunks = [] # type: List[Any]
  1218. start_mark = self.reader.get_mark()
  1219. quote = srp()
  1220. self.reader.forward()
  1221. chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
  1222. while srp() != quote:
  1223. chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
  1224. chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
  1225. self.reader.forward()
  1226. end_mark = self.reader.get_mark()
  1227. return ScalarToken("".join(chunks), False, start_mark, end_mark, style)
  1228. ESCAPE_REPLACEMENTS = {
  1229. '0': '\0',
  1230. 'a': '\x07',
  1231. 'b': '\x08',
  1232. 't': '\x09',
  1233. '\t': '\x09',
  1234. 'n': '\x0A',
  1235. 'v': '\x0B',
  1236. 'f': '\x0C',
  1237. 'r': '\x0D',
  1238. 'e': '\x1B',
  1239. ' ': '\x20',
  1240. '"': '"',
  1241. '/': '/', # as per http://www.json.org/
  1242. '\\': '\\',
  1243. 'N': '\x85',
  1244. '_': '\xA0',
  1245. 'L': '\u2028',
  1246. 'P': '\u2029',
  1247. }
  1248. ESCAPE_CODES = {'x': 2, 'u': 4, 'U': 8}
  1249. def scan_flow_scalar_non_spaces(self, double, start_mark):
  1250. # type: (Any, Any) -> Any
  1251. # See the specification for details.
  1252. chunks = [] # type: List[Any]
  1253. srp = self.reader.peek
  1254. srf = self.reader.forward
  1255. while True:
  1256. length = 0
  1257. while srp(length) not in ' \n\'"\\\0\t\r\x85\u2028\u2029':
  1258. length += 1
  1259. if length != 0:
  1260. chunks.append(self.reader.prefix(length))
  1261. srf(length)
  1262. ch = srp()
  1263. if not double and ch == "'" and srp(1) == "'":
  1264. chunks.append("'")
  1265. srf(2)
  1266. elif (double and ch == "'") or (not double and ch in '"\\'):
  1267. chunks.append(ch)
  1268. srf()
  1269. elif double and ch == '\\':
  1270. srf()
  1271. ch = srp()
  1272. if ch in self.ESCAPE_REPLACEMENTS:
  1273. chunks.append(self.ESCAPE_REPLACEMENTS[ch])
  1274. srf()
  1275. elif ch in self.ESCAPE_CODES:
  1276. length = self.ESCAPE_CODES[ch]
  1277. srf()
  1278. for k in range(length):
  1279. if srp(k) not in '0123456789ABCDEFabcdef':
  1280. raise ScannerError(
  1281. 'while scanning a double-quoted scalar',
  1282. start_mark,
  1283. 'expected escape sequence of %d hexdecimal '
  1284. 'numbers, but found %r' % (length, utf8(srp(k))),
  1285. self.reader.get_mark(),
  1286. )
  1287. code = int(self.reader.prefix(length), 16)
  1288. chunks.append(unichr(code))
  1289. srf(length)
  1290. elif ch in '\n\r\x85\u2028\u2029':
  1291. self.scan_line_break()
  1292. chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
  1293. else:
  1294. raise ScannerError(
  1295. 'while scanning a double-quoted scalar',
  1296. start_mark,
  1297. 'found unknown escape character %r' % utf8(ch),
  1298. self.reader.get_mark(),
  1299. )
  1300. else:
  1301. return chunks
  1302. def scan_flow_scalar_spaces(self, double, start_mark):
  1303. # type: (Any, Any) -> Any
  1304. # See the specification for details.
  1305. srp = self.reader.peek
  1306. chunks = []
  1307. length = 0
  1308. while srp(length) in ' \t':
  1309. length += 1
  1310. whitespaces = self.reader.prefix(length)
  1311. self.reader.forward(length)
  1312. ch = srp()
  1313. if ch == '\0':
  1314. raise ScannerError(
  1315. 'while scanning a quoted scalar',
  1316. start_mark,
  1317. 'found unexpected end of stream',
  1318. self.reader.get_mark(),
  1319. )
  1320. elif ch in '\r\n\x85\u2028\u2029':
  1321. line_break = self.scan_line_break()
  1322. breaks = self.scan_flow_scalar_breaks(double, start_mark)
  1323. if line_break != '\n':
  1324. chunks.append(line_break)
  1325. elif not breaks:
  1326. chunks.append(' ')
  1327. chunks.extend(breaks)
  1328. else:
  1329. chunks.append(whitespaces)
  1330. return chunks
  1331. def scan_flow_scalar_breaks(self, double, start_mark):
  1332. # type: (Any, Any) -> Any
  1333. # See the specification for details.
  1334. chunks = [] # type: List[Any]
  1335. srp = self.reader.peek
  1336. srf = self.reader.forward
  1337. while True:
  1338. # Instead of checking indentation, we check for document
  1339. # separators.
  1340. prefix = self.reader.prefix(3)
  1341. if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB:
  1342. raise ScannerError(
  1343. 'while scanning a quoted scalar',
  1344. start_mark,
  1345. 'found unexpected document separator',
  1346. self.reader.get_mark(),
  1347. )
  1348. while srp() in ' \t':
  1349. srf()
  1350. if srp() in '\r\n\x85\u2028\u2029':
  1351. chunks.append(self.scan_line_break())
  1352. else:
  1353. return chunks
  1354. def scan_plain(self):
  1355. # type: () -> Any
  1356. # See the specification for details.
  1357. # We add an additional restriction for the flow context:
  1358. # plain scalars in the flow context cannot contain ',', ': ' and '?'.
  1359. # We also keep track of the `allow_simple_key` flag here.
  1360. # Indentation rules are loosed for the flow context.
  1361. srp = self.reader.peek
  1362. srf = self.reader.forward
  1363. chunks = [] # type: List[Any]
  1364. start_mark = self.reader.get_mark()
  1365. end_mark = start_mark
  1366. indent = self.indent + 1
  1367. # We allow zero indentation for scalars, but then we need to check for
  1368. # document separators at the beginning of the line.
  1369. # if indent == 0:
  1370. # indent = 1
  1371. spaces = [] # type: List[Any]
  1372. while True:
  1373. length = 0
  1374. if srp() == '#':
  1375. break
  1376. while True:
  1377. ch = srp(length)
  1378. if ch == ':' and srp(length + 1) not in _THE_END_SPACE_TAB:
  1379. pass
  1380. elif ch == '?' and self.scanner_processing_version != (1, 1):
  1381. pass
  1382. elif (
  1383. ch in _THE_END_SPACE_TAB
  1384. or (
  1385. not self.flow_level
  1386. and ch == ':'
  1387. and srp(length + 1) in _THE_END_SPACE_TAB
  1388. )
  1389. or (self.flow_level and ch in ',:?[]{}')
  1390. ):
  1391. break
  1392. length += 1
  1393. # It's not clear what we should do with ':' in the flow context.
  1394. if (
  1395. self.flow_level
  1396. and ch == ':'
  1397. and srp(length + 1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'
  1398. ):
  1399. srf(length)
  1400. raise ScannerError(
  1401. 'while scanning a plain scalar',
  1402. start_mark,
  1403. "found unexpected ':'",
  1404. self.reader.get_mark(),
  1405. 'Please check '
  1406. 'http://pyyaml.org/wiki/YAMLColonInFlowContext '
  1407. 'for details.',
  1408. )
  1409. if length == 0:
  1410. break
  1411. self.allow_simple_key = False
  1412. chunks.extend(spaces)
  1413. chunks.append(self.reader.prefix(length))
  1414. srf(length)
  1415. end_mark = self.reader.get_mark()
  1416. spaces = self.scan_plain_spaces(indent, start_mark)
  1417. if (
  1418. not spaces
  1419. or srp() == '#'
  1420. or (not self.flow_level and self.reader.column < indent)
  1421. ):
  1422. break
  1423. token = ScalarToken("".join(chunks), True, start_mark, end_mark)
  1424. if spaces and spaces[0] == '\n':
  1425. # Create a comment token to preserve the trailing line breaks.
  1426. comment = CommentToken("".join(spaces) + '\n', start_mark, end_mark)
  1427. token.add_post_comment(comment)
  1428. return token
  1429. def scan_plain_spaces(self, indent, start_mark):
  1430. # type: (Any, Any) -> Any
  1431. # See the specification for details.
  1432. # The specification is really confusing about tabs in plain scalars.
  1433. # We just forbid them completely. Do not use tabs in YAML!
  1434. srp = self.reader.peek
  1435. srf = self.reader.forward
  1436. chunks = []
  1437. length = 0
  1438. while srp(length) in ' ':
  1439. length += 1
  1440. whitespaces = self.reader.prefix(length)
  1441. self.reader.forward(length)
  1442. ch = srp()
  1443. if ch in '\r\n\x85\u2028\u2029':
  1444. line_break = self.scan_line_break()
  1445. self.allow_simple_key = True
  1446. prefix = self.reader.prefix(3)
  1447. if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB:
  1448. return
  1449. breaks = []
  1450. while srp() in ' \r\n\x85\u2028\u2029':
  1451. if srp() == ' ':
  1452. srf()
  1453. else:
  1454. breaks.append(self.scan_line_break())
  1455. prefix = self.reader.prefix(3)
  1456. if (prefix == '---' or prefix == '...') and srp(3) in _THE_END_SPACE_TAB:
  1457. return
  1458. if line_break != '\n':
  1459. chunks.append(line_break)
  1460. elif not breaks:
  1461. chunks.append(' ')
  1462. chunks.extend(breaks)
  1463. elif whitespaces:
  1464. chunks.append(whitespaces)
  1465. return chunks
  1466. def scan_tag_handle(self, name, start_mark):
  1467. # type: (Any, Any) -> Any
  1468. # See the specification for details.
  1469. # For some strange reasons, the specification does not allow '_' in
  1470. # tag handles. I have allowed it anyway.
  1471. srp = self.reader.peek
  1472. ch = srp()
  1473. if ch != '!':
  1474. raise ScannerError(
  1475. 'while scanning a %s' % (name,),
  1476. start_mark,
  1477. "expected '!', but found %r" % utf8(ch),
  1478. self.reader.get_mark(),
  1479. )
  1480. length = 1
  1481. ch = srp(length)
  1482. if ch != ' ':
  1483. while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' or ch in '-_':
  1484. length += 1
  1485. ch = srp(length)
  1486. if ch != '!':
  1487. self.reader.forward(length)
  1488. raise ScannerError(
  1489. 'while scanning a %s' % (name,),
  1490. start_mark,
  1491. "expected '!', but found %r" % utf8(ch),
  1492. self.reader.get_mark(),
  1493. )
  1494. length += 1
  1495. value = self.reader.prefix(length)
  1496. self.reader.forward(length)
  1497. return value
  1498. def scan_tag_uri(self, name, start_mark):
  1499. # type: (Any, Any) -> Any
  1500. # See the specification for details.
  1501. # Note: we do not check if URI is well-formed.
  1502. srp = self.reader.peek
  1503. chunks = []
  1504. length = 0
  1505. ch = srp(length)
  1506. while (
  1507. '0' <= ch <= '9'
  1508. or 'A' <= ch <= 'Z'
  1509. or 'a' <= ch <= 'z'
  1510. or ch in "-;/?:@&=+$,_.!~*'()[]%"
  1511. or ((self.scanner_processing_version > (1, 1)) and ch == '#')
  1512. ):
  1513. if ch == '%':
  1514. chunks.append(self.reader.prefix(length))
  1515. self.reader.forward(length)
  1516. length = 0
  1517. chunks.append(self.scan_uri_escapes(name, start_mark))
  1518. else:
  1519. length += 1
  1520. ch = srp(length)
  1521. if length != 0:
  1522. chunks.append(self.reader.prefix(length))
  1523. self.reader.forward(length)
  1524. length = 0
  1525. if not chunks:
  1526. raise ScannerError(
  1527. 'while parsing a %s' % (name,),
  1528. start_mark,
  1529. 'expected URI, but found %r' % utf8(ch),
  1530. self.reader.get_mark(),
  1531. )
  1532. return "".join(chunks)
  1533. def scan_uri_escapes(self, name, start_mark):
  1534. # type: (Any, Any) -> Any
  1535. # See the specification for details.
  1536. srp = self.reader.peek
  1537. srf = self.reader.forward
  1538. code_bytes = [] # type: List[Any]
  1539. mark = self.reader.get_mark()
  1540. while srp() == '%':
  1541. srf()
  1542. for k in range(2):
  1543. if srp(k) not in '0123456789ABCDEFabcdef':
  1544. raise ScannerError(
  1545. 'while scanning a %s' % (name,),
  1546. start_mark,
  1547. 'expected URI escape sequence of 2 hexdecimal numbers,'
  1548. ' but found %r' % utf8(srp(k)),
  1549. self.reader.get_mark(),
  1550. )
  1551. if PY3:
  1552. code_bytes.append(int(self.reader.prefix(2), 16))
  1553. else:
  1554. code_bytes.append(chr(int(self.reader.prefix(2), 16)))
  1555. srf(2)
  1556. try:
  1557. if PY3:
  1558. value = bytes(code_bytes).decode('utf-8')
  1559. else:
  1560. value = unicode(b"".join(code_bytes), 'utf-8')
  1561. except UnicodeDecodeError as exc:
  1562. raise ScannerError('while scanning a %s' % (name,), start_mark, str(exc), mark)
  1563. return value
  1564. def scan_line_break(self):
  1565. # type: () -> Any
  1566. # Transforms:
  1567. # '\r\n' : '\n'
  1568. # '\r' : '\n'
  1569. # '\n' : '\n'
  1570. # '\x85' : '\n'
  1571. # '\u2028' : '\u2028'
  1572. # '\u2029 : '\u2029'
  1573. # default : ''
  1574. ch = self.reader.peek()
  1575. if ch in '\r\n\x85':
  1576. if self.reader.prefix(2) == '\r\n':
  1577. self.reader.forward(2)
  1578. else:
  1579. self.reader.forward()
  1580. return '\n'
  1581. elif ch in '\u2028\u2029':
  1582. self.reader.forward()
  1583. return ch
  1584. return ""
  1585. class RoundTripScanner(Scanner):
  1586. def check_token(self, *choices):
  1587. # type: (Any) -> bool
  1588. # Check if the next token is one of the given types.
  1589. while self.need_more_tokens():
  1590. self.fetch_more_tokens()
  1591. self._gather_comments()
  1592. if bool(self.tokens):
  1593. if not choices:
  1594. return True
  1595. for choice in choices:
  1596. if isinstance(self.tokens[0], choice):
  1597. return True
  1598. return False
  1599. def peek_token(self):
  1600. # type: () -> Any
  1601. # Return the next token, but do not delete if from the queue.
  1602. while self.need_more_tokens():
  1603. self.fetch_more_tokens()
  1604. self._gather_comments()
  1605. if bool(self.tokens):
  1606. return self.tokens[0]
  1607. return None
  1608. def _gather_comments(self):
  1609. # type: () -> Any
  1610. """combine multiple comment lines"""
  1611. comments = [] # type: List[Any]
  1612. if not self.tokens:
  1613. return comments
  1614. if isinstance(self.tokens[0], CommentToken):
  1615. comment = self.tokens.pop(0)
  1616. self.tokens_taken += 1
  1617. comments.append(comment)
  1618. while self.need_more_tokens():
  1619. self.fetch_more_tokens()
  1620. if not self.tokens:
  1621. return comments
  1622. if isinstance(self.tokens[0], CommentToken):
  1623. self.tokens_taken += 1
  1624. comment = self.tokens.pop(0)
  1625. # nprint('dropping2', comment)
  1626. comments.append(comment)
  1627. if len(comments) >= 1:
  1628. self.tokens[0].add_pre_comments(comments)
  1629. # pull in post comment on e.g. ':'
  1630. if not self.done and len(self.tokens) < 2:
  1631. self.fetch_more_tokens()
  1632. def get_token(self):
  1633. # type: () -> Any
  1634. # Return the next token.
  1635. while self.need_more_tokens():
  1636. self.fetch_more_tokens()
  1637. self._gather_comments()
  1638. if bool(self.tokens):
  1639. # nprint('tk', self.tokens)
  1640. # only add post comment to single line tokens:
  1641. # scalar, value token. FlowXEndToken, otherwise
  1642. # hidden streamtokens could get them (leave them and they will be
  1643. # pre comments for the next map/seq
  1644. if (
  1645. len(self.tokens) > 1
  1646. and isinstance(
  1647. self.tokens[0],
  1648. (ScalarToken, ValueToken, FlowSequenceEndToken, FlowMappingEndToken),
  1649. )
  1650. and isinstance(self.tokens[1], CommentToken)
  1651. and self.tokens[0].end_mark.line == self.tokens[1].start_mark.line
  1652. ):
  1653. self.tokens_taken += 1
  1654. c = self.tokens.pop(1)
  1655. self.fetch_more_tokens()
  1656. while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken):
  1657. self.tokens_taken += 1
  1658. c1 = self.tokens.pop(1)
  1659. c.value = c.value + (' ' * c1.start_mark.column) + c1.value
  1660. self.fetch_more_tokens()
  1661. self.tokens[0].add_post_comment(c)
  1662. elif (
  1663. len(self.tokens) > 1
  1664. and isinstance(self.tokens[0], ScalarToken)
  1665. and isinstance(self.tokens[1], CommentToken)
  1666. and self.tokens[0].end_mark.line != self.tokens[1].start_mark.line
  1667. ):
  1668. self.tokens_taken += 1
  1669. c = self.tokens.pop(1)
  1670. c.value = (
  1671. '\n' * (c.start_mark.line - self.tokens[0].end_mark.line)
  1672. + (' ' * c.start_mark.column)
  1673. + c.value
  1674. )
  1675. self.tokens[0].add_post_comment(c)
  1676. self.fetch_more_tokens()
  1677. while len(self.tokens) > 1 and isinstance(self.tokens[1], CommentToken):
  1678. self.tokens_taken += 1
  1679. c1 = self.tokens.pop(1)
  1680. c.value = c.value + (' ' * c1.start_mark.column) + c1.value
  1681. self.fetch_more_tokens()
  1682. self.tokens_taken += 1
  1683. return self.tokens.pop(0)
  1684. return None
  1685. def fetch_comment(self, comment):
  1686. # type: (Any) -> None
  1687. value, start_mark, end_mark = comment
  1688. while value and value[-1] == ' ':
  1689. # empty line within indented key context
  1690. # no need to update end-mark, that is not used
  1691. value = value[:-1]
  1692. self.tokens.append(CommentToken(value, start_mark, end_mark))
  1693. # scanner
  1694. def scan_to_next_token(self):
  1695. # type: () -> Any
  1696. # We ignore spaces, line breaks and comments.
  1697. # If we find a line break in the block context, we set the flag
  1698. # `allow_simple_key` on.
  1699. # The byte order mark is stripped if it's the first character in the
  1700. # stream. We do not yet support BOM inside the stream as the
  1701. # specification requires. Any such mark will be considered as a part
  1702. # of the document.
  1703. #
  1704. # TODO: We need to make tab handling rules more sane. A good rule is
  1705. # Tabs cannot precede tokens
  1706. # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
  1707. # KEY(block), VALUE(block), BLOCK-ENTRY
  1708. # So the checking code is
  1709. # if <TAB>:
  1710. # self.allow_simple_keys = False
  1711. # We also need to add the check for `allow_simple_keys == True` to
  1712. # `unwind_indent` before issuing BLOCK-END.
  1713. # Scanners for block, flow, and plain scalars need to be modified.
  1714. srp = self.reader.peek
  1715. srf = self.reader.forward
  1716. if self.reader.index == 0 and srp() == '\uFEFF':
  1717. srf()
  1718. found = False
  1719. while not found:
  1720. while srp() == ' ':
  1721. srf()
  1722. ch = srp()
  1723. if ch == '#':
  1724. start_mark = self.reader.get_mark()
  1725. comment = ch
  1726. srf()
  1727. while ch not in _THE_END:
  1728. ch = srp()
  1729. if ch == '\0': # don't gobble the end-of-stream character
  1730. # but add an explicit newline as "YAML processors should terminate
  1731. # the stream with an explicit line break
  1732. # https://yaml.org/spec/1.2/spec.html#id2780069
  1733. comment += '\n'
  1734. break
  1735. comment += ch
  1736. srf()
  1737. # gather any blank lines following the comment too
  1738. ch = self.scan_line_break()
  1739. while len(ch) > 0:
  1740. comment += ch
  1741. ch = self.scan_line_break()
  1742. end_mark = self.reader.get_mark()
  1743. if not self.flow_level:
  1744. self.allow_simple_key = True
  1745. return comment, start_mark, end_mark
  1746. if bool(self.scan_line_break()):
  1747. start_mark = self.reader.get_mark()
  1748. if not self.flow_level:
  1749. self.allow_simple_key = True
  1750. ch = srp()
  1751. if ch == '\n': # empty toplevel lines
  1752. start_mark = self.reader.get_mark()
  1753. comment = ""
  1754. while ch:
  1755. ch = self.scan_line_break(empty_line=True)
  1756. comment += ch
  1757. if srp() == '#':
  1758. # empty line followed by indented real comment
  1759. comment = comment.rsplit('\n', 1)[0] + '\n'
  1760. end_mark = self.reader.get_mark()
  1761. return comment, start_mark, end_mark
  1762. else:
  1763. found = True
  1764. return None
  1765. def scan_line_break(self, empty_line=False):
  1766. # type: (bool) -> Text
  1767. # Transforms:
  1768. # '\r\n' : '\n'
  1769. # '\r' : '\n'
  1770. # '\n' : '\n'
  1771. # '\x85' : '\n'
  1772. # '\u2028' : '\u2028'
  1773. # '\u2029 : '\u2029'
  1774. # default : ''
  1775. ch = self.reader.peek() # type: Text
  1776. if ch in '\r\n\x85':
  1777. if self.reader.prefix(2) == '\r\n':
  1778. self.reader.forward(2)
  1779. else:
  1780. self.reader.forward()
  1781. return '\n'
  1782. elif ch in '\u2028\u2029':
  1783. self.reader.forward()
  1784. return ch
  1785. elif empty_line and ch in '\t ':
  1786. self.reader.forward()
  1787. return ch
  1788. return ""
  1789. def scan_block_scalar(self, style, rt=True):
  1790. # type: (Any, Optional[bool]) -> Any
  1791. return Scanner.scan_block_scalar(self, style, rt=rt)
  1792. # try:
  1793. # import psyco
  1794. # psyco.bind(Scanner)
  1795. # except ImportError:
  1796. # pass