scannerc.go 86 KB


  1. //
  2. // Copyright (c) 2011-2019 Canonical Ltd
  3. // Copyright (c) 2006-2010 Kirill Simonov
  4. //
  5. // Permission is hereby granted, free of charge, to any person obtaining a copy of
  6. // this software and associated documentation files (the "Software"), to deal in
  7. // the Software without restriction, including without limitation the rights to
  8. // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  9. // of the Software, and to permit persons to whom the Software is furnished to do
  10. // so, subject to the following conditions:
  11. //
  12. // The above copyright notice and this permission notice shall be included in all
  13. // copies or substantial portions of the Software.
  14. //
  15. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. // SOFTWARE.
  22. package yaml
  23. import (
  24. "bytes"
  25. "fmt"
  26. )
  27. // Introduction
  28. // ************
  29. //
  30. // The following notes assume that you are familiar with the YAML specification
  31. // (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in
  32. // some cases we are less restrictive that it requires.
  33. //
  34. // The process of transforming a YAML stream into a sequence of events is
  35. // divided on two steps: Scanning and Parsing.
  36. //
  37. // The Scanner transforms the input stream into a sequence of tokens, while the
  38. // parser transform the sequence of tokens produced by the Scanner into a
  39. // sequence of parsing events.
  40. //
  41. // The Scanner is rather clever and complicated. The Parser, on the contrary,
  42. // is a straightforward implementation of a recursive-descendant parser (or,
  43. // LL(1) parser, as it is usually called).
  44. //
  45. // Actually there are two issues of Scanning that might be called "clever", the
  46. // rest is quite straightforward. The issues are "block collection start" and
  47. // "simple keys". Both issues are explained below in details.
  48. //
  49. // Here the Scanning step is explained and implemented. We start with the list
  50. // of all the tokens produced by the Scanner together with short descriptions.
  51. //
  52. // Now, tokens:
  53. //
  54. // STREAM-START(encoding) # The stream start.
  55. // STREAM-END # The stream end.
  56. // VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
  57. // TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
  58. // DOCUMENT-START # '---'
  59. // DOCUMENT-END # '...'
  60. // BLOCK-SEQUENCE-START # Indentation increase denoting a block
  61. // BLOCK-MAPPING-START # sequence or a block mapping.
  62. // BLOCK-END # Indentation decrease.
  63. // FLOW-SEQUENCE-START # '['
  64. // FLOW-SEQUENCE-END # ']'
  65. // BLOCK-SEQUENCE-START # '{'
  66. // BLOCK-SEQUENCE-END # '}'
  67. // BLOCK-ENTRY # '-'
  68. // FLOW-ENTRY # ','
  69. // KEY # '?' or nothing (simple keys).
  70. // VALUE # ':'
  71. // ALIAS(anchor) # '*anchor'
  72. // ANCHOR(anchor) # '&anchor'
  73. // TAG(handle,suffix) # '!handle!suffix'
  74. // SCALAR(value,style) # A scalar.
  75. //
  76. // The following two tokens are "virtual" tokens denoting the beginning and the
  77. // end of the stream:
  78. //
  79. // STREAM-START(encoding)
  80. // STREAM-END
  81. //
  82. // We pass the information about the input stream encoding with the
  83. // STREAM-START token.
  84. //
  85. // The next two tokens are responsible for tags:
  86. //
  87. // VERSION-DIRECTIVE(major,minor)
  88. // TAG-DIRECTIVE(handle,prefix)
  89. //
  90. // Example:
  91. //
  92. // %YAML 1.1
  93. // %TAG ! !foo
  94. // %TAG !yaml! tag:yaml.org,2002:
  95. // ---
  96. //
  97. // The correspoding sequence of tokens:
  98. //
  99. // STREAM-START(utf-8)
  100. // VERSION-DIRECTIVE(1,1)
  101. // TAG-DIRECTIVE("!","!foo")
  102. // TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
  103. // DOCUMENT-START
  104. // STREAM-END
  105. //
  106. // Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
  107. // line.
  108. //
  109. // The document start and end indicators are represented by:
  110. //
  111. // DOCUMENT-START
  112. // DOCUMENT-END
  113. //
  114. // Note that if a YAML stream contains an implicit document (without '---'
  115. // and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
  116. // produced.
  117. //
  118. // In the following examples, we present whole documents together with the
  119. // produced tokens.
  120. //
  121. // 1. An implicit document:
  122. //
  123. // 'a scalar'
  124. //
  125. // Tokens:
  126. //
  127. // STREAM-START(utf-8)
  128. // SCALAR("a scalar",single-quoted)
  129. // STREAM-END
  130. //
  131. // 2. An explicit document:
  132. //
  133. // ---
  134. // 'a scalar'
  135. // ...
  136. //
  137. // Tokens:
  138. //
  139. // STREAM-START(utf-8)
  140. // DOCUMENT-START
  141. // SCALAR("a scalar",single-quoted)
  142. // DOCUMENT-END
  143. // STREAM-END
  144. //
  145. // 3. Several documents in a stream:
  146. //
  147. // 'a scalar'
  148. // ---
  149. // 'another scalar'
  150. // ---
  151. // 'yet another scalar'
  152. //
  153. // Tokens:
  154. //
  155. // STREAM-START(utf-8)
  156. // SCALAR("a scalar",single-quoted)
  157. // DOCUMENT-START
  158. // SCALAR("another scalar",single-quoted)
  159. // DOCUMENT-START
  160. // SCALAR("yet another scalar",single-quoted)
  161. // STREAM-END
  162. //
  163. // We have already introduced the SCALAR token above. The following tokens are
  164. // used to describe aliases, anchors, tag, and scalars:
  165. //
  166. // ALIAS(anchor)
  167. // ANCHOR(anchor)
  168. // TAG(handle,suffix)
  169. // SCALAR(value,style)
  170. //
  171. // The following series of examples illustrate the usage of these tokens:
  172. //
  173. // 1. A recursive sequence:
  174. //
  175. // &A [ *A ]
  176. //
  177. // Tokens:
  178. //
  179. // STREAM-START(utf-8)
  180. // ANCHOR("A")
  181. // FLOW-SEQUENCE-START
  182. // ALIAS("A")
  183. // FLOW-SEQUENCE-END
  184. // STREAM-END
  185. //
  186. // 2. A tagged scalar:
  187. //
  188. // !!float "3.14" # A good approximation.
  189. //
  190. // Tokens:
  191. //
  192. // STREAM-START(utf-8)
  193. // TAG("!!","float")
  194. // SCALAR("3.14",double-quoted)
  195. // STREAM-END
  196. //
  197. // 3. Various scalar styles:
  198. //
  199. // --- # Implicit empty plain scalars do not produce tokens.
  200. // --- a plain scalar
  201. // --- 'a single-quoted scalar'
  202. // --- "a double-quoted scalar"
  203. // --- |-
  204. // a literal scalar
  205. // --- >-
  206. // a folded
  207. // scalar
  208. //
  209. // Tokens:
  210. //
  211. // STREAM-START(utf-8)
  212. // DOCUMENT-START
  213. // DOCUMENT-START
  214. // SCALAR("a plain scalar",plain)
  215. // DOCUMENT-START
  216. // SCALAR("a single-quoted scalar",single-quoted)
  217. // DOCUMENT-START
  218. // SCALAR("a double-quoted scalar",double-quoted)
  219. // DOCUMENT-START
  220. // SCALAR("a literal scalar",literal)
  221. // DOCUMENT-START
  222. // SCALAR("a folded scalar",folded)
  223. // STREAM-END
  224. //
  225. // Now it's time to review collection-related tokens. We will start with
  226. // flow collections:
  227. //
  228. // FLOW-SEQUENCE-START
  229. // FLOW-SEQUENCE-END
  230. // FLOW-MAPPING-START
  231. // FLOW-MAPPING-END
  232. // FLOW-ENTRY
  233. // KEY
  234. // VALUE
  235. //
  236. // The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
  237. // FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
  238. // correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
  239. // indicators '?' and ':', which are used for denoting mapping keys and values,
  240. // are represented by the KEY and VALUE tokens.
  241. //
  242. // The following examples show flow collections:
  243. //
  244. // 1. A flow sequence:
  245. //
  246. // [item 1, item 2, item 3]
  247. //
  248. // Tokens:
  249. //
  250. // STREAM-START(utf-8)
  251. // FLOW-SEQUENCE-START
  252. // SCALAR("item 1",plain)
  253. // FLOW-ENTRY
  254. // SCALAR("item 2",plain)
  255. // FLOW-ENTRY
  256. // SCALAR("item 3",plain)
  257. // FLOW-SEQUENCE-END
  258. // STREAM-END
  259. //
  260. // 2. A flow mapping:
  261. //
  262. // {
  263. // a simple key: a value, # Note that the KEY token is produced.
  264. // ? a complex key: another value,
  265. // }
  266. //
  267. // Tokens:
  268. //
  269. // STREAM-START(utf-8)
  270. // FLOW-MAPPING-START
  271. // KEY
  272. // SCALAR("a simple key",plain)
  273. // VALUE
  274. // SCALAR("a value",plain)
  275. // FLOW-ENTRY
  276. // KEY
  277. // SCALAR("a complex key",plain)
  278. // VALUE
  279. // SCALAR("another value",plain)
  280. // FLOW-ENTRY
  281. // FLOW-MAPPING-END
  282. // STREAM-END
  283. //
  284. // A simple key is a key which is not denoted by the '?' indicator. Note that
  285. // the Scanner still produce the KEY token whenever it encounters a simple key.
  286. //
  287. // For scanning block collections, the following tokens are used (note that we
  288. // repeat KEY and VALUE here):
  289. //
  290. // BLOCK-SEQUENCE-START
  291. // BLOCK-MAPPING-START
  292. // BLOCK-END
  293. // BLOCK-ENTRY
  294. // KEY
  295. // VALUE
  296. //
  297. // The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
  298. // increase that precedes a block collection (cf. the INDENT token in Python).
  299. // The token BLOCK-END denote indentation decrease that ends a block collection
  300. // (cf. the DEDENT token in Python). However YAML has some syntax pecularities
  301. // that makes detections of these tokens more complex.
  302. //
  303. // The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
  304. // '-', '?', and ':' correspondingly.
  305. //
  306. // The following examples show how the tokens BLOCK-SEQUENCE-START,
  307. // BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
  308. //
  309. // 1. Block sequences:
  310. //
  311. // - item 1
  312. // - item 2
  313. // -
  314. // - item 3.1
  315. // - item 3.2
  316. // -
  317. // key 1: value 1
  318. // key 2: value 2
  319. //
  320. // Tokens:
  321. //
  322. // STREAM-START(utf-8)
  323. // BLOCK-SEQUENCE-START
  324. // BLOCK-ENTRY
  325. // SCALAR("item 1",plain)
  326. // BLOCK-ENTRY
  327. // SCALAR("item 2",plain)
  328. // BLOCK-ENTRY
  329. // BLOCK-SEQUENCE-START
  330. // BLOCK-ENTRY
  331. // SCALAR("item 3.1",plain)
  332. // BLOCK-ENTRY
  333. // SCALAR("item 3.2",plain)
  334. // BLOCK-END
  335. // BLOCK-ENTRY
  336. // BLOCK-MAPPING-START
  337. // KEY
  338. // SCALAR("key 1",plain)
  339. // VALUE
  340. // SCALAR("value 1",plain)
  341. // KEY
  342. // SCALAR("key 2",plain)
  343. // VALUE
  344. // SCALAR("value 2",plain)
  345. // BLOCK-END
  346. // BLOCK-END
  347. // STREAM-END
  348. //
  349. // 2. Block mappings:
  350. //
  351. // a simple key: a value # The KEY token is produced here.
  352. // ? a complex key
  353. // : another value
  354. // a mapping:
  355. // key 1: value 1
  356. // key 2: value 2
  357. // a sequence:
  358. // - item 1
  359. // - item 2
  360. //
  361. // Tokens:
  362. //
  363. // STREAM-START(utf-8)
  364. // BLOCK-MAPPING-START
  365. // KEY
  366. // SCALAR("a simple key",plain)
  367. // VALUE
  368. // SCALAR("a value",plain)
  369. // KEY
  370. // SCALAR("a complex key",plain)
  371. // VALUE
  372. // SCALAR("another value",plain)
  373. // KEY
  374. // SCALAR("a mapping",plain)
  375. // BLOCK-MAPPING-START
  376. // KEY
  377. // SCALAR("key 1",plain)
  378. // VALUE
  379. // SCALAR("value 1",plain)
  380. // KEY
  381. // SCALAR("key 2",plain)
  382. // VALUE
  383. // SCALAR("value 2",plain)
  384. // BLOCK-END
  385. // KEY
  386. // SCALAR("a sequence",plain)
  387. // VALUE
  388. // BLOCK-SEQUENCE-START
  389. // BLOCK-ENTRY
  390. // SCALAR("item 1",plain)
  391. // BLOCK-ENTRY
  392. // SCALAR("item 2",plain)
  393. // BLOCK-END
  394. // BLOCK-END
  395. // STREAM-END
  396. //
  397. // YAML does not always require to start a new block collection from a new
  398. // line. If the current line contains only '-', '?', and ':' indicators, a new
  399. // block collection may start at the current line. The following examples
  400. // illustrate this case:
  401. //
  402. // 1. Collections in a sequence:
  403. //
  404. // - - item 1
  405. // - item 2
  406. // - key 1: value 1
  407. // key 2: value 2
  408. // - ? complex key
  409. // : complex value
  410. //
  411. // Tokens:
  412. //
  413. // STREAM-START(utf-8)
  414. // BLOCK-SEQUENCE-START
  415. // BLOCK-ENTRY
  416. // BLOCK-SEQUENCE-START
  417. // BLOCK-ENTRY
  418. // SCALAR("item 1",plain)
  419. // BLOCK-ENTRY
  420. // SCALAR("item 2",plain)
  421. // BLOCK-END
  422. // BLOCK-ENTRY
  423. // BLOCK-MAPPING-START
  424. // KEY
  425. // SCALAR("key 1",plain)
  426. // VALUE
  427. // SCALAR("value 1",plain)
  428. // KEY
  429. // SCALAR("key 2",plain)
  430. // VALUE
  431. // SCALAR("value 2",plain)
  432. // BLOCK-END
  433. // BLOCK-ENTRY
  434. // BLOCK-MAPPING-START
  435. // KEY
  436. // SCALAR("complex key")
  437. // VALUE
  438. // SCALAR("complex value")
  439. // BLOCK-END
  440. // BLOCK-END
  441. // STREAM-END
  442. //
  443. // 2. Collections in a mapping:
  444. //
  445. // ? a sequence
  446. // : - item 1
  447. // - item 2
  448. // ? a mapping
  449. // : key 1: value 1
  450. // key 2: value 2
  451. //
  452. // Tokens:
  453. //
  454. // STREAM-START(utf-8)
  455. // BLOCK-MAPPING-START
  456. // KEY
  457. // SCALAR("a sequence",plain)
  458. // VALUE
  459. // BLOCK-SEQUENCE-START
  460. // BLOCK-ENTRY
  461. // SCALAR("item 1",plain)
  462. // BLOCK-ENTRY
  463. // SCALAR("item 2",plain)
  464. // BLOCK-END
  465. // KEY
  466. // SCALAR("a mapping",plain)
  467. // VALUE
  468. // BLOCK-MAPPING-START
  469. // KEY
  470. // SCALAR("key 1",plain)
  471. // VALUE
  472. // SCALAR("value 1",plain)
  473. // KEY
  474. // SCALAR("key 2",plain)
  475. // VALUE
  476. // SCALAR("value 2",plain)
  477. // BLOCK-END
  478. // BLOCK-END
  479. // STREAM-END
  480. //
  481. // YAML also permits non-indented sequences if they are included into a block
  482. // mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
  483. //
  484. // key:
  485. // - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
  486. // - item 2
  487. //
  488. // Tokens:
  489. //
  490. // STREAM-START(utf-8)
  491. // BLOCK-MAPPING-START
  492. // KEY
  493. // SCALAR("key",plain)
  494. // VALUE
  495. // BLOCK-ENTRY
  496. // SCALAR("item 1",plain)
  497. // BLOCK-ENTRY
  498. // SCALAR("item 2",plain)
  499. // BLOCK-END
  500. //
  501. // Ensure that the buffer contains the required number of characters.
  502. // Return true on success, false on failure (reader error or memory error).
  503. func cache(parser *yaml_parser_t, length int) bool {
  504. // [Go] This was inlined: !cache(A, B) -> unread < B && !update(A, B)
  505. return parser.unread >= length || yaml_parser_update_buffer(parser, length)
  506. }
  507. // Advance the buffer pointer.
  508. func skip(parser *yaml_parser_t) {
  509. if !is_blank(parser.buffer, parser.buffer_pos) {
  510. parser.newlines = 0
  511. }
  512. parser.mark.index++
  513. parser.mark.column++
  514. parser.unread--
  515. parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
  516. }
  517. func skip_line(parser *yaml_parser_t) {
  518. if is_crlf(parser.buffer, parser.buffer_pos) {
  519. parser.mark.index += 2
  520. parser.mark.column = 0
  521. parser.mark.line++
  522. parser.unread -= 2
  523. parser.buffer_pos += 2
  524. parser.newlines++
  525. } else if is_break(parser.buffer, parser.buffer_pos) {
  526. parser.mark.index++
  527. parser.mark.column = 0
  528. parser.mark.line++
  529. parser.unread--
  530. parser.buffer_pos += width(parser.buffer[parser.buffer_pos])
  531. parser.newlines++
  532. }
  533. }
  534. // Copy a character to a string buffer and advance pointers.
  535. func read(parser *yaml_parser_t, s []byte) []byte {
  536. if !is_blank(parser.buffer, parser.buffer_pos) {
  537. parser.newlines = 0
  538. }
  539. w := width(parser.buffer[parser.buffer_pos])
  540. if w == 0 {
  541. panic("invalid character sequence")
  542. }
  543. if len(s) == 0 {
  544. s = make([]byte, 0, 32)
  545. }
  546. if w == 1 && len(s)+w <= cap(s) {
  547. s = s[:len(s)+1]
  548. s[len(s)-1] = parser.buffer[parser.buffer_pos]
  549. parser.buffer_pos++
  550. } else {
  551. s = append(s, parser.buffer[parser.buffer_pos:parser.buffer_pos+w]...)
  552. parser.buffer_pos += w
  553. }
  554. parser.mark.index++
  555. parser.mark.column++
  556. parser.unread--
  557. return s
  558. }
  559. // Copy a line break character to a string buffer and advance pointers.
  560. func read_line(parser *yaml_parser_t, s []byte) []byte {
  561. buf := parser.buffer
  562. pos := parser.buffer_pos
  563. switch {
  564. case buf[pos] == '\r' && buf[pos+1] == '\n':
  565. // CR LF . LF
  566. s = append(s, '\n')
  567. parser.buffer_pos += 2
  568. parser.mark.index++
  569. parser.unread--
  570. case buf[pos] == '\r' || buf[pos] == '\n':
  571. // CR|LF . LF
  572. s = append(s, '\n')
  573. parser.buffer_pos += 1
  574. case buf[pos] == '\xC2' && buf[pos+1] == '\x85':
  575. // NEL . LF
  576. s = append(s, '\n')
  577. parser.buffer_pos += 2
  578. case buf[pos] == '\xE2' && buf[pos+1] == '\x80' && (buf[pos+2] == '\xA8' || buf[pos+2] == '\xA9'):
  579. // LS|PS . LS|PS
  580. s = append(s, buf[parser.buffer_pos:pos+3]...)
  581. parser.buffer_pos += 3
  582. default:
  583. return s
  584. }
  585. parser.mark.index++
  586. parser.mark.column = 0
  587. parser.mark.line++
  588. parser.unread--
  589. parser.newlines++
  590. return s
  591. }
  592. // Get the next token.
  593. func yaml_parser_scan(parser *yaml_parser_t, token *yaml_token_t) bool {
  594. // Erase the token object.
  595. *token = yaml_token_t{} // [Go] Is this necessary?
  596. // No tokens after STREAM-END or error.
  597. if parser.stream_end_produced || parser.error != yaml_NO_ERROR {
  598. return true
  599. }
  600. // Ensure that the tokens queue contains enough tokens.
  601. if !parser.token_available {
  602. if !yaml_parser_fetch_more_tokens(parser) {
  603. return false
  604. }
  605. }
  606. // Fetch the next token from the queue.
  607. *token = parser.tokens[parser.tokens_head]
  608. parser.tokens_head++
  609. parser.tokens_parsed++
  610. parser.token_available = false
  611. if token.typ == yaml_STREAM_END_TOKEN {
  612. parser.stream_end_produced = true
  613. }
  614. return true
  615. }
  616. // Set the scanner error and return false.
  617. func yaml_parser_set_scanner_error(parser *yaml_parser_t, context string, context_mark yaml_mark_t, problem string) bool {
  618. parser.error = yaml_SCANNER_ERROR
  619. parser.context = context
  620. parser.context_mark = context_mark
  621. parser.problem = problem
  622. parser.problem_mark = parser.mark
  623. return false
  624. }
  625. func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, context_mark yaml_mark_t, problem string) bool {
  626. context := "while parsing a tag"
  627. if directive {
  628. context = "while parsing a %TAG directive"
  629. }
  630. return yaml_parser_set_scanner_error(parser, context, context_mark, problem)
  631. }
  632. func trace(args ...interface{}) func() {
  633. pargs := append([]interface{}{"+++"}, args...)
  634. fmt.Println(pargs...)
  635. pargs = append([]interface{}{"---"}, args...)
  636. return func() { fmt.Println(pargs...) }
  637. }
  638. // Ensure that the tokens queue contains at least one token which can be
  639. // returned to the Parser.
  640. func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
  641. // While we need more tokens to fetch, do it.
  642. for {
  643. // [Go] The comment parsing logic requires a lookahead of two tokens
  644. // so that foot comments may be parsed in time of associating them
  645. // with the tokens that are parsed before them, and also for line
  646. // comments to be transformed into head comments in some edge cases.
  647. if parser.tokens_head < len(parser.tokens)-2 {
  648. // If a potential simple key is at the head position, we need to fetch
  649. // the next token to disambiguate it.
  650. head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
  651. if !ok {
  652. break
  653. } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
  654. return false
  655. } else if !valid {
  656. break
  657. }
  658. }
  659. // Fetch the next token.
  660. if !yaml_parser_fetch_next_token(parser) {
  661. return false
  662. }
  663. }
  664. parser.token_available = true
  665. return true
  666. }
  667. // The dispatcher for token fetchers.
  668. func yaml_parser_fetch_next_token(parser *yaml_parser_t) (ok bool) {
  669. // Ensure that the buffer is initialized.
  670. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  671. return false
  672. }
  673. // Check if we just started scanning. Fetch STREAM-START then.
  674. if !parser.stream_start_produced {
  675. return yaml_parser_fetch_stream_start(parser)
  676. }
  677. scan_mark := parser.mark
  678. // Eat whitespaces and comments until we reach the next token.
  679. if !yaml_parser_scan_to_next_token(parser) {
  680. return false
  681. }
  682. // [Go] While unrolling indents, transform the head comments of prior
  683. // indentation levels observed after scan_start into foot comments at
  684. // the respective indexes.
  685. // Check the indentation level against the current column.
  686. if !yaml_parser_unroll_indent(parser, parser.mark.column, scan_mark) {
  687. return false
  688. }
  689. // Ensure that the buffer contains at least 4 characters. 4 is the length
  690. // of the longest indicators ('--- ' and '... ').
  691. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  692. return false
  693. }
  694. // Is it the end of the stream?
  695. if is_z(parser.buffer, parser.buffer_pos) {
  696. return yaml_parser_fetch_stream_end(parser)
  697. }
  698. // Is it a directive?
  699. if parser.mark.column == 0 && parser.buffer[parser.buffer_pos] == '%' {
  700. return yaml_parser_fetch_directive(parser)
  701. }
  702. buf := parser.buffer
  703. pos := parser.buffer_pos
  704. // Is it the document start indicator?
  705. if parser.mark.column == 0 && buf[pos] == '-' && buf[pos+1] == '-' && buf[pos+2] == '-' && is_blankz(buf, pos+3) {
  706. return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_START_TOKEN)
  707. }
  708. // Is it the document end indicator?
  709. if parser.mark.column == 0 && buf[pos] == '.' && buf[pos+1] == '.' && buf[pos+2] == '.' && is_blankz(buf, pos+3) {
  710. return yaml_parser_fetch_document_indicator(parser, yaml_DOCUMENT_END_TOKEN)
  711. }
  712. comment_mark := parser.mark
  713. if len(parser.tokens) > 0 && (parser.flow_level == 0 && buf[pos] == ':' || parser.flow_level > 0 && buf[pos] == ',') {
  714. // Associate any following comments with the prior token.
  715. comment_mark = parser.tokens[len(parser.tokens)-1].start_mark
  716. }
  717. defer func() {
  718. if !ok {
  719. return
  720. }
  721. if len(parser.tokens) > 0 && parser.tokens[len(parser.tokens)-1].typ == yaml_BLOCK_ENTRY_TOKEN {
  722. // Sequence indicators alone have no line comments. It becomes
  723. // a head comment for whatever follows.
  724. return
  725. }
  726. if !yaml_parser_scan_line_comment(parser, comment_mark) {
  727. ok = false
  728. return
  729. }
  730. }()
  731. // Is it the flow sequence start indicator?
  732. if buf[pos] == '[' {
  733. return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_SEQUENCE_START_TOKEN)
  734. }
  735. // Is it the flow mapping start indicator?
  736. if parser.buffer[parser.buffer_pos] == '{' {
  737. return yaml_parser_fetch_flow_collection_start(parser, yaml_FLOW_MAPPING_START_TOKEN)
  738. }
  739. // Is it the flow sequence end indicator?
  740. if parser.buffer[parser.buffer_pos] == ']' {
  741. return yaml_parser_fetch_flow_collection_end(parser,
  742. yaml_FLOW_SEQUENCE_END_TOKEN)
  743. }
  744. // Is it the flow mapping end indicator?
  745. if parser.buffer[parser.buffer_pos] == '}' {
  746. return yaml_parser_fetch_flow_collection_end(parser,
  747. yaml_FLOW_MAPPING_END_TOKEN)
  748. }
  749. // Is it the flow entry indicator?
  750. if parser.buffer[parser.buffer_pos] == ',' {
  751. return yaml_parser_fetch_flow_entry(parser)
  752. }
  753. // Is it the block entry indicator?
  754. if parser.buffer[parser.buffer_pos] == '-' && is_blankz(parser.buffer, parser.buffer_pos+1) {
  755. return yaml_parser_fetch_block_entry(parser)
  756. }
  757. // Is it the key indicator?
  758. if parser.buffer[parser.buffer_pos] == '?' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
  759. return yaml_parser_fetch_key(parser)
  760. }
  761. // Is it the value indicator?
  762. if parser.buffer[parser.buffer_pos] == ':' && (parser.flow_level > 0 || is_blankz(parser.buffer, parser.buffer_pos+1)) {
  763. return yaml_parser_fetch_value(parser)
  764. }
  765. // Is it an alias?
  766. if parser.buffer[parser.buffer_pos] == '*' {
  767. return yaml_parser_fetch_anchor(parser, yaml_ALIAS_TOKEN)
  768. }
  769. // Is it an anchor?
  770. if parser.buffer[parser.buffer_pos] == '&' {
  771. return yaml_parser_fetch_anchor(parser, yaml_ANCHOR_TOKEN)
  772. }
  773. // Is it a tag?
  774. if parser.buffer[parser.buffer_pos] == '!' {
  775. return yaml_parser_fetch_tag(parser)
  776. }
  777. // Is it a literal scalar?
  778. if parser.buffer[parser.buffer_pos] == '|' && parser.flow_level == 0 {
  779. return yaml_parser_fetch_block_scalar(parser, true)
  780. }
  781. // Is it a folded scalar?
  782. if parser.buffer[parser.buffer_pos] == '>' && parser.flow_level == 0 {
  783. return yaml_parser_fetch_block_scalar(parser, false)
  784. }
  785. // Is it a single-quoted scalar?
  786. if parser.buffer[parser.buffer_pos] == '\'' {
  787. return yaml_parser_fetch_flow_scalar(parser, true)
  788. }
  789. // Is it a double-quoted scalar?
  790. if parser.buffer[parser.buffer_pos] == '"' {
  791. return yaml_parser_fetch_flow_scalar(parser, false)
  792. }
  793. // Is it a plain scalar?
  794. //
  795. // A plain scalar may start with any non-blank characters except
  796. //
  797. // '-', '?', ':', ',', '[', ']', '{', '}',
  798. // '#', '&', '*', '!', '|', '>', '\'', '\"',
  799. // '%', '@', '`'.
  800. //
  801. // In the block context (and, for the '-' indicator, in the flow context
  802. // too), it may also start with the characters
  803. //
  804. // '-', '?', ':'
  805. //
  806. // if it is followed by a non-space character.
  807. //
  808. // The last rule is more restrictive than the specification requires.
  809. // [Go] TODO Make this logic more reasonable.
  810. //switch parser.buffer[parser.buffer_pos] {
  811. //case '-', '?', ':', ',', '?', '-', ',', ':', ']', '[', '}', '{', '&', '#', '!', '*', '>', '|', '"', '\'', '@', '%', '-', '`':
  812. //}
  813. if !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '-' ||
  814. parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':' ||
  815. parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '[' ||
  816. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
  817. parser.buffer[parser.buffer_pos] == '}' || parser.buffer[parser.buffer_pos] == '#' ||
  818. parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '*' ||
  819. parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '|' ||
  820. parser.buffer[parser.buffer_pos] == '>' || parser.buffer[parser.buffer_pos] == '\'' ||
  821. parser.buffer[parser.buffer_pos] == '"' || parser.buffer[parser.buffer_pos] == '%' ||
  822. parser.buffer[parser.buffer_pos] == '@' || parser.buffer[parser.buffer_pos] == '`') ||
  823. (parser.buffer[parser.buffer_pos] == '-' && !is_blank(parser.buffer, parser.buffer_pos+1)) ||
  824. (parser.flow_level == 0 &&
  825. (parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == ':') &&
  826. !is_blankz(parser.buffer, parser.buffer_pos+1)) {
  827. return yaml_parser_fetch_plain_scalar(parser)
  828. }
  829. // If we don't determine the token type so far, it is an error.
  830. return yaml_parser_set_scanner_error(parser,
  831. "while scanning for the next token", parser.mark,
  832. "found character that cannot start any token")
  833. }
  834. func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) {
  835. if !simple_key.possible {
  836. return false, true
  837. }
  838. // The 1.2 specification says:
  839. //
  840. // "If the ? indicator is omitted, parsing needs to see past the
  841. // implicit key to recognize it as such. To limit the amount of
  842. // lookahead required, the “:” indicator must appear at most 1024
  843. // Unicode characters beyond the start of the key. In addition, the key
  844. // is restricted to a single line."
  845. //
  846. if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index {
  847. // Check if the potential simple key to be removed is required.
  848. if simple_key.required {
  849. return false, yaml_parser_set_scanner_error(parser,
  850. "while scanning a simple key", simple_key.mark,
  851. "could not find expected ':'")
  852. }
  853. simple_key.possible = false
  854. return false, true
  855. }
  856. return true, true
  857. }
  858. // Check if a simple key may start at the current position and add it if
  859. // needed.
  860. func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
  861. // A simple key is required at the current position if the scanner is in
  862. // the block context and the current column coincides with the indentation
  863. // level.
  864. required := parser.flow_level == 0 && parser.indent == parser.mark.column
  865. //
  866. // If the current position may start a simple key, save it.
  867. //
  868. if parser.simple_key_allowed {
  869. simple_key := yaml_simple_key_t{
  870. possible: true,
  871. required: required,
  872. token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
  873. mark: parser.mark,
  874. }
  875. if !yaml_parser_remove_simple_key(parser) {
  876. return false
  877. }
  878. parser.simple_keys[len(parser.simple_keys)-1] = simple_key
  879. parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
  880. }
  881. return true
  882. }
  883. // Remove a potential simple key at the current flow level.
  884. func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
  885. i := len(parser.simple_keys) - 1
  886. if parser.simple_keys[i].possible {
  887. // If the key is required, it is an error.
  888. if parser.simple_keys[i].required {
  889. return yaml_parser_set_scanner_error(parser,
  890. "while scanning a simple key", parser.simple_keys[i].mark,
  891. "could not find expected ':'")
  892. }
  893. // Remove the key from the stack.
  894. parser.simple_keys[i].possible = false
  895. delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
  896. }
  897. return true
  898. }
  899. // max_flow_level limits the flow_level
  900. const max_flow_level = 10000
  901. // Increase the flow level and resize the simple key list if needed.
  902. func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
  903. // Reset the simple key on the next level.
  904. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{
  905. possible: false,
  906. required: false,
  907. token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head),
  908. mark: parser.mark,
  909. })
  910. // Increase the flow level.
  911. parser.flow_level++
  912. if parser.flow_level > max_flow_level {
  913. return yaml_parser_set_scanner_error(parser,
  914. "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
  915. fmt.Sprintf("exceeded max depth of %d", max_flow_level))
  916. }
  917. return true
  918. }
  919. // Decrease the flow level.
  920. func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
  921. if parser.flow_level > 0 {
  922. parser.flow_level--
  923. last := len(parser.simple_keys) - 1
  924. delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
  925. parser.simple_keys = parser.simple_keys[:last]
  926. }
  927. return true
  928. }
  929. // max_indents limits the indents stack size
  930. const max_indents = 10000
  931. // Push the current indentation level to the stack and set the new level
  932. // the current column is greater than the indentation level. In this case,
  933. // append or insert the specified token into the token queue.
  934. func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml_token_type_t, mark yaml_mark_t) bool {
  935. // In the flow context, do nothing.
  936. if parser.flow_level > 0 {
  937. return true
  938. }
  939. if parser.indent < column {
  940. // Push the current indentation level to the stack and set the new
  941. // indentation level.
  942. parser.indents = append(parser.indents, parser.indent)
  943. parser.indent = column
  944. if len(parser.indents) > max_indents {
  945. return yaml_parser_set_scanner_error(parser,
  946. "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
  947. fmt.Sprintf("exceeded max depth of %d", max_indents))
  948. }
  949. // Create a token and insert it into the queue.
  950. token := yaml_token_t{
  951. typ: typ,
  952. start_mark: mark,
  953. end_mark: mark,
  954. }
  955. if number > -1 {
  956. number -= parser.tokens_parsed
  957. }
  958. yaml_insert_token(parser, number, &token)
  959. }
  960. return true
  961. }
  962. // Pop indentation levels from the indents stack until the current level
  963. // becomes less or equal to the column. For each indentation level, append
  964. // the BLOCK-END token.
  965. func yaml_parser_unroll_indent(parser *yaml_parser_t, column int, scan_mark yaml_mark_t) bool {
  966. // In the flow context, do nothing.
  967. if parser.flow_level > 0 {
  968. return true
  969. }
  970. block_mark := scan_mark
  971. block_mark.index--
  972. // Loop through the indentation levels in the stack.
  973. for parser.indent > column {
  974. // [Go] Reposition the end token before potential following
  975. // foot comments of parent blocks. For that, search
  976. // backwards for recent comments that were at the same
  977. // indent as the block that is ending now.
  978. stop_index := block_mark.index
  979. for i := len(parser.comments) - 1; i >= 0; i-- {
  980. comment := &parser.comments[i]
  981. if comment.end_mark.index < stop_index {
  982. // Don't go back beyond the start of the comment/whitespace scan, unless column < 0.
  983. // If requested indent column is < 0, then the document is over and everything else
  984. // is a foot anyway.
  985. break
  986. }
  987. if comment.start_mark.column == parser.indent+1 {
  988. // This is a good match. But maybe there's a former comment
  989. // at that same indent level, so keep searching.
  990. block_mark = comment.start_mark
  991. }
  992. // While the end of the former comment matches with
  993. // the start of the following one, we know there's
  994. // nothing in between and scanning is still safe.
  995. stop_index = comment.scan_mark.index
  996. }
  997. // Create a token and append it to the queue.
  998. token := yaml_token_t{
  999. typ: yaml_BLOCK_END_TOKEN,
  1000. start_mark: block_mark,
  1001. end_mark: block_mark,
  1002. }
  1003. yaml_insert_token(parser, -1, &token)
  1004. // Pop the indentation level.
  1005. parser.indent = parser.indents[len(parser.indents)-1]
  1006. parser.indents = parser.indents[:len(parser.indents)-1]
  1007. }
  1008. return true
  1009. }
  1010. // Initialize the scanner and produce the STREAM-START token.
  1011. func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
  1012. // Set the initial indentation.
  1013. parser.indent = -1
  1014. // Initialize the simple key stack.
  1015. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})
  1016. parser.simple_keys_by_tok = make(map[int]int)
  1017. // A simple key is allowed at the beginning of the stream.
  1018. parser.simple_key_allowed = true
  1019. // We have started.
  1020. parser.stream_start_produced = true
  1021. // Create the STREAM-START token and append it to the queue.
  1022. token := yaml_token_t{
  1023. typ: yaml_STREAM_START_TOKEN,
  1024. start_mark: parser.mark,
  1025. end_mark: parser.mark,
  1026. encoding: parser.encoding,
  1027. }
  1028. yaml_insert_token(parser, -1, &token)
  1029. return true
  1030. }
  1031. // Produce the STREAM-END token and shut down the scanner.
  1032. func yaml_parser_fetch_stream_end(parser *yaml_parser_t) bool {
  1033. // Force new line.
  1034. if parser.mark.column != 0 {
  1035. parser.mark.column = 0
  1036. parser.mark.line++
  1037. }
  1038. // Reset the indentation level.
  1039. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1040. return false
  1041. }
  1042. // Reset simple keys.
  1043. if !yaml_parser_remove_simple_key(parser) {
  1044. return false
  1045. }
  1046. parser.simple_key_allowed = false
  1047. // Create the STREAM-END token and append it to the queue.
  1048. token := yaml_token_t{
  1049. typ: yaml_STREAM_END_TOKEN,
  1050. start_mark: parser.mark,
  1051. end_mark: parser.mark,
  1052. }
  1053. yaml_insert_token(parser, -1, &token)
  1054. return true
  1055. }
  1056. // Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
  1057. func yaml_parser_fetch_directive(parser *yaml_parser_t) bool {
  1058. // Reset the indentation level.
  1059. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1060. return false
  1061. }
  1062. // Reset simple keys.
  1063. if !yaml_parser_remove_simple_key(parser) {
  1064. return false
  1065. }
  1066. parser.simple_key_allowed = false
  1067. // Create the YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1068. token := yaml_token_t{}
  1069. if !yaml_parser_scan_directive(parser, &token) {
  1070. return false
  1071. }
  1072. // Append the token to the queue.
  1073. yaml_insert_token(parser, -1, &token)
  1074. return true
  1075. }
  1076. // Produce the DOCUMENT-START or DOCUMENT-END token.
  1077. func yaml_parser_fetch_document_indicator(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1078. // Reset the indentation level.
  1079. if !yaml_parser_unroll_indent(parser, -1, parser.mark) {
  1080. return false
  1081. }
  1082. // Reset simple keys.
  1083. if !yaml_parser_remove_simple_key(parser) {
  1084. return false
  1085. }
  1086. parser.simple_key_allowed = false
  1087. // Consume the token.
  1088. start_mark := parser.mark
  1089. skip(parser)
  1090. skip(parser)
  1091. skip(parser)
  1092. end_mark := parser.mark
  1093. // Create the DOCUMENT-START or DOCUMENT-END token.
  1094. token := yaml_token_t{
  1095. typ: typ,
  1096. start_mark: start_mark,
  1097. end_mark: end_mark,
  1098. }
  1099. // Append the token to the queue.
  1100. yaml_insert_token(parser, -1, &token)
  1101. return true
  1102. }
  1103. // Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
  1104. func yaml_parser_fetch_flow_collection_start(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1105. // The indicators '[' and '{' may start a simple key.
  1106. if !yaml_parser_save_simple_key(parser) {
  1107. return false
  1108. }
  1109. // Increase the flow level.
  1110. if !yaml_parser_increase_flow_level(parser) {
  1111. return false
  1112. }
  1113. // A simple key may follow the indicators '[' and '{'.
  1114. parser.simple_key_allowed = true
  1115. // Consume the token.
  1116. start_mark := parser.mark
  1117. skip(parser)
  1118. end_mark := parser.mark
  1119. // Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token.
  1120. token := yaml_token_t{
  1121. typ: typ,
  1122. start_mark: start_mark,
  1123. end_mark: end_mark,
  1124. }
  1125. // Append the token to the queue.
  1126. yaml_insert_token(parser, -1, &token)
  1127. return true
  1128. }
  1129. // Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
  1130. func yaml_parser_fetch_flow_collection_end(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1131. // Reset any potential simple key on the current flow level.
  1132. if !yaml_parser_remove_simple_key(parser) {
  1133. return false
  1134. }
  1135. // Decrease the flow level.
  1136. if !yaml_parser_decrease_flow_level(parser) {
  1137. return false
  1138. }
  1139. // No simple keys after the indicators ']' and '}'.
  1140. parser.simple_key_allowed = false
  1141. // Consume the token.
  1142. start_mark := parser.mark
  1143. skip(parser)
  1144. end_mark := parser.mark
  1145. // Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token.
  1146. token := yaml_token_t{
  1147. typ: typ,
  1148. start_mark: start_mark,
  1149. end_mark: end_mark,
  1150. }
  1151. // Append the token to the queue.
  1152. yaml_insert_token(parser, -1, &token)
  1153. return true
  1154. }
  1155. // Produce the FLOW-ENTRY token.
  1156. func yaml_parser_fetch_flow_entry(parser *yaml_parser_t) bool {
  1157. // Reset any potential simple keys on the current flow level.
  1158. if !yaml_parser_remove_simple_key(parser) {
  1159. return false
  1160. }
  1161. // Simple keys are allowed after ','.
  1162. parser.simple_key_allowed = true
  1163. // Consume the token.
  1164. start_mark := parser.mark
  1165. skip(parser)
  1166. end_mark := parser.mark
  1167. // Create the FLOW-ENTRY token and append it to the queue.
  1168. token := yaml_token_t{
  1169. typ: yaml_FLOW_ENTRY_TOKEN,
  1170. start_mark: start_mark,
  1171. end_mark: end_mark,
  1172. }
  1173. yaml_insert_token(parser, -1, &token)
  1174. return true
  1175. }
  1176. // Produce the BLOCK-ENTRY token.
  1177. func yaml_parser_fetch_block_entry(parser *yaml_parser_t) bool {
  1178. // Check if the scanner is in the block context.
  1179. if parser.flow_level == 0 {
  1180. // Check if we are allowed to start a new entry.
  1181. if !parser.simple_key_allowed {
  1182. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1183. "block sequence entries are not allowed in this context")
  1184. }
  1185. // Add the BLOCK-SEQUENCE-START token if needed.
  1186. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_SEQUENCE_START_TOKEN, parser.mark) {
  1187. return false
  1188. }
  1189. } else {
  1190. // It is an error for the '-' indicator to occur in the flow context,
  1191. // but we let the Parser detect and report about it because the Parser
  1192. // is able to point to the context.
  1193. }
  1194. // Reset any potential simple keys on the current flow level.
  1195. if !yaml_parser_remove_simple_key(parser) {
  1196. return false
  1197. }
  1198. // Simple keys are allowed after '-'.
  1199. parser.simple_key_allowed = true
  1200. // Consume the token.
  1201. start_mark := parser.mark
  1202. skip(parser)
  1203. end_mark := parser.mark
  1204. // Create the BLOCK-ENTRY token and append it to the queue.
  1205. token := yaml_token_t{
  1206. typ: yaml_BLOCK_ENTRY_TOKEN,
  1207. start_mark: start_mark,
  1208. end_mark: end_mark,
  1209. }
  1210. yaml_insert_token(parser, -1, &token)
  1211. return true
  1212. }
  1213. // Produce the KEY token.
  1214. func yaml_parser_fetch_key(parser *yaml_parser_t) bool {
  1215. // In the block context, additional checks are required.
  1216. if parser.flow_level == 0 {
  1217. // Check if we are allowed to start a new key (not nessesary simple).
  1218. if !parser.simple_key_allowed {
  1219. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1220. "mapping keys are not allowed in this context")
  1221. }
  1222. // Add the BLOCK-MAPPING-START token if needed.
  1223. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
  1224. return false
  1225. }
  1226. }
  1227. // Reset any potential simple keys on the current flow level.
  1228. if !yaml_parser_remove_simple_key(parser) {
  1229. return false
  1230. }
  1231. // Simple keys are allowed after '?' in the block context.
  1232. parser.simple_key_allowed = parser.flow_level == 0
  1233. // Consume the token.
  1234. start_mark := parser.mark
  1235. skip(parser)
  1236. end_mark := parser.mark
  1237. // Create the KEY token and append it to the queue.
  1238. token := yaml_token_t{
  1239. typ: yaml_KEY_TOKEN,
  1240. start_mark: start_mark,
  1241. end_mark: end_mark,
  1242. }
  1243. yaml_insert_token(parser, -1, &token)
  1244. return true
  1245. }
  1246. // Produce the VALUE token.
  1247. func yaml_parser_fetch_value(parser *yaml_parser_t) bool {
  1248. simple_key := &parser.simple_keys[len(parser.simple_keys)-1]
  1249. // Have we found a simple key?
  1250. if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
  1251. return false
  1252. } else if valid {
  1253. // Create the KEY token and insert it into the queue.
  1254. token := yaml_token_t{
  1255. typ: yaml_KEY_TOKEN,
  1256. start_mark: simple_key.mark,
  1257. end_mark: simple_key.mark,
  1258. }
  1259. yaml_insert_token(parser, simple_key.token_number-parser.tokens_parsed, &token)
  1260. // In the block context, we may need to add the BLOCK-MAPPING-START token.
  1261. if !yaml_parser_roll_indent(parser, simple_key.mark.column,
  1262. simple_key.token_number,
  1263. yaml_BLOCK_MAPPING_START_TOKEN, simple_key.mark) {
  1264. return false
  1265. }
  1266. // Remove the simple key.
  1267. simple_key.possible = false
  1268. delete(parser.simple_keys_by_tok, simple_key.token_number)
  1269. // A simple key cannot follow another simple key.
  1270. parser.simple_key_allowed = false
  1271. } else {
  1272. // The ':' indicator follows a complex key.
  1273. // In the block context, extra checks are required.
  1274. if parser.flow_level == 0 {
  1275. // Check if we are allowed to start a complex value.
  1276. if !parser.simple_key_allowed {
  1277. return yaml_parser_set_scanner_error(parser, "", parser.mark,
  1278. "mapping values are not allowed in this context")
  1279. }
  1280. // Add the BLOCK-MAPPING-START token if needed.
  1281. if !yaml_parser_roll_indent(parser, parser.mark.column, -1, yaml_BLOCK_MAPPING_START_TOKEN, parser.mark) {
  1282. return false
  1283. }
  1284. }
  1285. // Simple keys after ':' are allowed in the block context.
  1286. parser.simple_key_allowed = parser.flow_level == 0
  1287. }
  1288. // Consume the token.
  1289. start_mark := parser.mark
  1290. skip(parser)
  1291. end_mark := parser.mark
  1292. // Create the VALUE token and append it to the queue.
  1293. token := yaml_token_t{
  1294. typ: yaml_VALUE_TOKEN,
  1295. start_mark: start_mark,
  1296. end_mark: end_mark,
  1297. }
  1298. yaml_insert_token(parser, -1, &token)
  1299. return true
  1300. }
  1301. // Produce the ALIAS or ANCHOR token.
  1302. func yaml_parser_fetch_anchor(parser *yaml_parser_t, typ yaml_token_type_t) bool {
  1303. // An anchor or an alias could be a simple key.
  1304. if !yaml_parser_save_simple_key(parser) {
  1305. return false
  1306. }
  1307. // A simple key cannot follow an anchor or an alias.
  1308. parser.simple_key_allowed = false
  1309. // Create the ALIAS or ANCHOR token and append it to the queue.
  1310. var token yaml_token_t
  1311. if !yaml_parser_scan_anchor(parser, &token, typ) {
  1312. return false
  1313. }
  1314. yaml_insert_token(parser, -1, &token)
  1315. return true
  1316. }
  1317. // Produce the TAG token.
  1318. func yaml_parser_fetch_tag(parser *yaml_parser_t) bool {
  1319. // A tag could be a simple key.
  1320. if !yaml_parser_save_simple_key(parser) {
  1321. return false
  1322. }
  1323. // A simple key cannot follow a tag.
  1324. parser.simple_key_allowed = false
  1325. // Create the TAG token and append it to the queue.
  1326. var token yaml_token_t
  1327. if !yaml_parser_scan_tag(parser, &token) {
  1328. return false
  1329. }
  1330. yaml_insert_token(parser, -1, &token)
  1331. return true
  1332. }
  1333. // Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
  1334. func yaml_parser_fetch_block_scalar(parser *yaml_parser_t, literal bool) bool {
  1335. // Remove any potential simple keys.
  1336. if !yaml_parser_remove_simple_key(parser) {
  1337. return false
  1338. }
  1339. // A simple key may follow a block scalar.
  1340. parser.simple_key_allowed = true
  1341. // Create the SCALAR token and append it to the queue.
  1342. var token yaml_token_t
  1343. if !yaml_parser_scan_block_scalar(parser, &token, literal) {
  1344. return false
  1345. }
  1346. yaml_insert_token(parser, -1, &token)
  1347. return true
  1348. }
  1349. // Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
  1350. func yaml_parser_fetch_flow_scalar(parser *yaml_parser_t, single bool) bool {
  1351. // A plain scalar could be a simple key.
  1352. if !yaml_parser_save_simple_key(parser) {
  1353. return false
  1354. }
  1355. // A simple key cannot follow a flow scalar.
  1356. parser.simple_key_allowed = false
  1357. // Create the SCALAR token and append it to the queue.
  1358. var token yaml_token_t
  1359. if !yaml_parser_scan_flow_scalar(parser, &token, single) {
  1360. return false
  1361. }
  1362. yaml_insert_token(parser, -1, &token)
  1363. return true
  1364. }
  1365. // Produce the SCALAR(...,plain) token.
  1366. func yaml_parser_fetch_plain_scalar(parser *yaml_parser_t) bool {
  1367. // A plain scalar could be a simple key.
  1368. if !yaml_parser_save_simple_key(parser) {
  1369. return false
  1370. }
  1371. // A simple key cannot follow a flow scalar.
  1372. parser.simple_key_allowed = false
  1373. // Create the SCALAR token and append it to the queue.
  1374. var token yaml_token_t
  1375. if !yaml_parser_scan_plain_scalar(parser, &token) {
  1376. return false
  1377. }
  1378. yaml_insert_token(parser, -1, &token)
  1379. return true
  1380. }
  1381. // Eat whitespaces and comments until the next token is found.
  1382. func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool {
  1383. scan_mark := parser.mark
  1384. // Until the next token is not found.
  1385. for {
  1386. // Allow the BOM mark to start a line.
  1387. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1388. return false
  1389. }
  1390. if parser.mark.column == 0 && is_bom(parser.buffer, parser.buffer_pos) {
  1391. skip(parser)
  1392. }
  1393. // Eat whitespaces.
  1394. // Tabs are allowed:
  1395. // - in the flow context
  1396. // - in the block context, but not at the beginning of the line or
  1397. // after '-', '?', or ':' (complex value).
  1398. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1399. return false
  1400. }
  1401. for parser.buffer[parser.buffer_pos] == ' ' || ((parser.flow_level > 0 || !parser.simple_key_allowed) && parser.buffer[parser.buffer_pos] == '\t') {
  1402. skip(parser)
  1403. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1404. return false
  1405. }
  1406. }
  1407. // Check if we just had a line comment under a sequence entry that
  1408. // looks more like a header to the following content. Similar to this:
  1409. //
  1410. // - # The comment
  1411. // - Some data
  1412. //
  1413. // If so, transform the line comment to a head comment and reposition.
  1414. if len(parser.comments) > 0 && len(parser.tokens) > 1 {
  1415. tokenA := parser.tokens[len(parser.tokens)-2]
  1416. tokenB := parser.tokens[len(parser.tokens)-1]
  1417. comment := &parser.comments[len(parser.comments)-1]
  1418. if tokenA.typ == yaml_BLOCK_SEQUENCE_START_TOKEN && tokenB.typ == yaml_BLOCK_ENTRY_TOKEN && len(comment.line) > 0 && !is_break(parser.buffer, parser.buffer_pos) {
  1419. // If it was in the prior line, reposition so it becomes a
  1420. // header of the follow up token. Otherwise, keep it in place
  1421. // so it becomes a header of the former.
  1422. comment.head = comment.line
  1423. comment.line = nil
  1424. if comment.start_mark.line == parser.mark.line-1 {
  1425. comment.token_mark = parser.mark
  1426. }
  1427. }
  1428. }
  1429. // Eat a comment until a line break.
  1430. if parser.buffer[parser.buffer_pos] == '#' {
  1431. if !yaml_parser_scan_comments(parser, scan_mark) {
  1432. return false
  1433. }
  1434. }
  1435. // If it is a line break, eat it.
  1436. if is_break(parser.buffer, parser.buffer_pos) {
  1437. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1438. return false
  1439. }
  1440. skip_line(parser)
  1441. // In the block context, a new line may start a simple key.
  1442. if parser.flow_level == 0 {
  1443. parser.simple_key_allowed = true
  1444. }
  1445. } else {
  1446. break // We have found a token.
  1447. }
  1448. }
  1449. return true
  1450. }
  1451. // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
  1452. //
  1453. // Scope:
  1454. // %YAML 1.1 # a comment \n
  1455. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1456. // %TAG !yaml! tag:yaml.org,2002: \n
  1457. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1458. //
  1459. func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool {
  1460. // Eat '%'.
  1461. start_mark := parser.mark
  1462. skip(parser)
  1463. // Scan the directive name.
  1464. var name []byte
  1465. if !yaml_parser_scan_directive_name(parser, start_mark, &name) {
  1466. return false
  1467. }
  1468. // Is it a YAML directive?
  1469. if bytes.Equal(name, []byte("YAML")) {
  1470. // Scan the VERSION directive value.
  1471. var major, minor int8
  1472. if !yaml_parser_scan_version_directive_value(parser, start_mark, &major, &minor) {
  1473. return false
  1474. }
  1475. end_mark := parser.mark
  1476. // Create a VERSION-DIRECTIVE token.
  1477. *token = yaml_token_t{
  1478. typ: yaml_VERSION_DIRECTIVE_TOKEN,
  1479. start_mark: start_mark,
  1480. end_mark: end_mark,
  1481. major: major,
  1482. minor: minor,
  1483. }
  1484. // Is it a TAG directive?
  1485. } else if bytes.Equal(name, []byte("TAG")) {
  1486. // Scan the TAG directive value.
  1487. var handle, prefix []byte
  1488. if !yaml_parser_scan_tag_directive_value(parser, start_mark, &handle, &prefix) {
  1489. return false
  1490. }
  1491. end_mark := parser.mark
  1492. // Create a TAG-DIRECTIVE token.
  1493. *token = yaml_token_t{
  1494. typ: yaml_TAG_DIRECTIVE_TOKEN,
  1495. start_mark: start_mark,
  1496. end_mark: end_mark,
  1497. value: handle,
  1498. prefix: prefix,
  1499. }
  1500. // Unknown directive.
  1501. } else {
  1502. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1503. start_mark, "found unknown directive name")
  1504. return false
  1505. }
  1506. // Eat the rest of the line including any comments.
  1507. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1508. return false
  1509. }
  1510. for is_blank(parser.buffer, parser.buffer_pos) {
  1511. skip(parser)
  1512. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1513. return false
  1514. }
  1515. }
  1516. if parser.buffer[parser.buffer_pos] == '#' {
  1517. // [Go] Discard this inline comment for the time being.
  1518. //if !yaml_parser_scan_line_comment(parser, start_mark) {
  1519. // return false
  1520. //}
  1521. for !is_breakz(parser.buffer, parser.buffer_pos) {
  1522. skip(parser)
  1523. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1524. return false
  1525. }
  1526. }
  1527. }
  1528. // Check if we are at the end of the line.
  1529. if !is_breakz(parser.buffer, parser.buffer_pos) {
  1530. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1531. start_mark, "did not find expected comment or line break")
  1532. return false
  1533. }
  1534. // Eat a line break.
  1535. if is_break(parser.buffer, parser.buffer_pos) {
  1536. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1537. return false
  1538. }
  1539. skip_line(parser)
  1540. }
  1541. return true
  1542. }
  1543. // Scan the directive name.
  1544. //
  1545. // Scope:
  1546. // %YAML 1.1 # a comment \n
  1547. // ^^^^
  1548. // %TAG !yaml! tag:yaml.org,2002: \n
  1549. // ^^^
  1550. //
  1551. func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool {
  1552. // Consume the directive name.
  1553. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1554. return false
  1555. }
  1556. var s []byte
  1557. for is_alpha(parser.buffer, parser.buffer_pos) {
  1558. s = read(parser, s)
  1559. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1560. return false
  1561. }
  1562. }
  1563. // Check if the name is empty.
  1564. if len(s) == 0 {
  1565. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1566. start_mark, "could not find expected directive name")
  1567. return false
  1568. }
  1569. // Check for an blank character after the name.
  1570. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1571. yaml_parser_set_scanner_error(parser, "while scanning a directive",
  1572. start_mark, "found unexpected non-alphabetical character")
  1573. return false
  1574. }
  1575. *name = s
  1576. return true
  1577. }
  1578. // Scan the value of VERSION-DIRECTIVE.
  1579. //
  1580. // Scope:
  1581. // %YAML 1.1 # a comment \n
  1582. // ^^^^^^
  1583. func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool {
  1584. // Eat whitespaces.
  1585. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1586. return false
  1587. }
  1588. for is_blank(parser.buffer, parser.buffer_pos) {
  1589. skip(parser)
  1590. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1591. return false
  1592. }
  1593. }
  1594. // Consume the major version number.
  1595. if !yaml_parser_scan_version_directive_number(parser, start_mark, major) {
  1596. return false
  1597. }
  1598. // Eat '.'.
  1599. if parser.buffer[parser.buffer_pos] != '.' {
  1600. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1601. start_mark, "did not find expected digit or '.' character")
  1602. }
  1603. skip(parser)
  1604. // Consume the minor version number.
  1605. if !yaml_parser_scan_version_directive_number(parser, start_mark, minor) {
  1606. return false
  1607. }
  1608. return true
  1609. }
  1610. const max_number_length = 2
  1611. // Scan the version number of VERSION-DIRECTIVE.
  1612. //
  1613. // Scope:
  1614. // %YAML 1.1 # a comment \n
  1615. // ^
  1616. // %YAML 1.1 # a comment \n
  1617. // ^
  1618. func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool {
  1619. // Repeat while the next character is digit.
  1620. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1621. return false
  1622. }
  1623. var value, length int8
  1624. for is_digit(parser.buffer, parser.buffer_pos) {
  1625. // Check if the number is too long.
  1626. length++
  1627. if length > max_number_length {
  1628. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1629. start_mark, "found extremely long version number")
  1630. }
  1631. value = value*10 + int8(as_digit(parser.buffer, parser.buffer_pos))
  1632. skip(parser)
  1633. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1634. return false
  1635. }
  1636. }
  1637. // Check if the number was present.
  1638. if length == 0 {
  1639. return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
  1640. start_mark, "did not find expected version number")
  1641. }
  1642. *number = value
  1643. return true
  1644. }
  1645. // Scan the value of a TAG-DIRECTIVE token.
  1646. //
  1647. // Scope:
  1648. // %TAG !yaml! tag:yaml.org,2002: \n
  1649. // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  1650. //
  1651. func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool {
  1652. var handle_value, prefix_value []byte
  1653. // Eat whitespaces.
  1654. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1655. return false
  1656. }
  1657. for is_blank(parser.buffer, parser.buffer_pos) {
  1658. skip(parser)
  1659. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1660. return false
  1661. }
  1662. }
  1663. // Scan a handle.
  1664. if !yaml_parser_scan_tag_handle(parser, true, start_mark, &handle_value) {
  1665. return false
  1666. }
  1667. // Expect a whitespace.
  1668. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1669. return false
  1670. }
  1671. if !is_blank(parser.buffer, parser.buffer_pos) {
  1672. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1673. start_mark, "did not find expected whitespace")
  1674. return false
  1675. }
  1676. // Eat whitespaces.
  1677. for is_blank(parser.buffer, parser.buffer_pos) {
  1678. skip(parser)
  1679. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1680. return false
  1681. }
  1682. }
  1683. // Scan a prefix.
  1684. if !yaml_parser_scan_tag_uri(parser, true, nil, start_mark, &prefix_value) {
  1685. return false
  1686. }
  1687. // Expect a whitespace or line break.
  1688. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1689. return false
  1690. }
  1691. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1692. yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
  1693. start_mark, "did not find expected whitespace or line break")
  1694. return false
  1695. }
  1696. *handle = handle_value
  1697. *prefix = prefix_value
  1698. return true
  1699. }
  1700. func yaml_parser_scan_anchor(parser *yaml_parser_t, token *yaml_token_t, typ yaml_token_type_t) bool {
  1701. var s []byte
  1702. // Eat the indicator character.
  1703. start_mark := parser.mark
  1704. skip(parser)
  1705. // Consume the value.
  1706. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1707. return false
  1708. }
  1709. for is_alpha(parser.buffer, parser.buffer_pos) {
  1710. s = read(parser, s)
  1711. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1712. return false
  1713. }
  1714. }
  1715. end_mark := parser.mark
  1716. /*
  1717. * Check if length of the anchor is greater than 0 and it is followed by
  1718. * a whitespace character or one of the indicators:
  1719. *
  1720. * '?', ':', ',', ']', '}', '%', '@', '`'.
  1721. */
  1722. if len(s) == 0 ||
  1723. !(is_blankz(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == '?' ||
  1724. parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == ',' ||
  1725. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '}' ||
  1726. parser.buffer[parser.buffer_pos] == '%' || parser.buffer[parser.buffer_pos] == '@' ||
  1727. parser.buffer[parser.buffer_pos] == '`') {
  1728. context := "while scanning an alias"
  1729. if typ == yaml_ANCHOR_TOKEN {
  1730. context = "while scanning an anchor"
  1731. }
  1732. yaml_parser_set_scanner_error(parser, context, start_mark,
  1733. "did not find expected alphabetic or numeric character")
  1734. return false
  1735. }
  1736. // Create a token.
  1737. *token = yaml_token_t{
  1738. typ: typ,
  1739. start_mark: start_mark,
  1740. end_mark: end_mark,
  1741. value: s,
  1742. }
  1743. return true
  1744. }
  1745. /*
  1746. * Scan a TAG token.
  1747. */
  1748. func yaml_parser_scan_tag(parser *yaml_parser_t, token *yaml_token_t) bool {
  1749. var handle, suffix []byte
  1750. start_mark := parser.mark
  1751. // Check if the tag is in the canonical form.
  1752. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  1753. return false
  1754. }
  1755. if parser.buffer[parser.buffer_pos+1] == '<' {
  1756. // Keep the handle as ''
  1757. // Eat '!<'
  1758. skip(parser)
  1759. skip(parser)
  1760. // Consume the tag value.
  1761. if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
  1762. return false
  1763. }
  1764. // Check for '>' and eat it.
  1765. if parser.buffer[parser.buffer_pos] != '>' {
  1766. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1767. start_mark, "did not find the expected '>'")
  1768. return false
  1769. }
  1770. skip(parser)
  1771. } else {
  1772. // The tag has either the '!suffix' or the '!handle!suffix' form.
  1773. // First, try to scan a handle.
  1774. if !yaml_parser_scan_tag_handle(parser, false, start_mark, &handle) {
  1775. return false
  1776. }
  1777. // Check if it is, indeed, handle.
  1778. if handle[0] == '!' && len(handle) > 1 && handle[len(handle)-1] == '!' {
  1779. // Scan the suffix now.
  1780. if !yaml_parser_scan_tag_uri(parser, false, nil, start_mark, &suffix) {
  1781. return false
  1782. }
  1783. } else {
  1784. // It wasn't a handle after all. Scan the rest of the tag.
  1785. if !yaml_parser_scan_tag_uri(parser, false, handle, start_mark, &suffix) {
  1786. return false
  1787. }
  1788. // Set the handle to '!'.
  1789. handle = []byte{'!'}
  1790. // A special case: the '!' tag. Set the handle to '' and the
  1791. // suffix to '!'.
  1792. if len(suffix) == 0 {
  1793. handle, suffix = suffix, handle
  1794. }
  1795. }
  1796. }
  1797. // Check the character which ends the tag.
  1798. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1799. return false
  1800. }
  1801. if !is_blankz(parser.buffer, parser.buffer_pos) {
  1802. yaml_parser_set_scanner_error(parser, "while scanning a tag",
  1803. start_mark, "did not find expected whitespace or line break")
  1804. return false
  1805. }
  1806. end_mark := parser.mark
  1807. // Create a token.
  1808. *token = yaml_token_t{
  1809. typ: yaml_TAG_TOKEN,
  1810. start_mark: start_mark,
  1811. end_mark: end_mark,
  1812. value: handle,
  1813. suffix: suffix,
  1814. }
  1815. return true
  1816. }
  1817. // Scan a tag handle.
  1818. func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, handle *[]byte) bool {
  1819. // Check the initial '!' character.
  1820. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1821. return false
  1822. }
  1823. if parser.buffer[parser.buffer_pos] != '!' {
  1824. yaml_parser_set_scanner_tag_error(parser, directive,
  1825. start_mark, "did not find expected '!'")
  1826. return false
  1827. }
  1828. var s []byte
  1829. // Copy the '!' character.
  1830. s = read(parser, s)
  1831. // Copy all subsequent alphabetical and numerical characters.
  1832. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1833. return false
  1834. }
  1835. for is_alpha(parser.buffer, parser.buffer_pos) {
  1836. s = read(parser, s)
  1837. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1838. return false
  1839. }
  1840. }
  1841. // Check if the trailing character is '!' and copy it.
  1842. if parser.buffer[parser.buffer_pos] == '!' {
  1843. s = read(parser, s)
  1844. } else {
  1845. // It's either the '!' tag or not really a tag handle. If it's a %TAG
  1846. // directive, it's an error. If it's a tag token, it must be a part of URI.
  1847. if directive && string(s) != "!" {
  1848. yaml_parser_set_scanner_tag_error(parser, directive,
  1849. start_mark, "did not find expected '!'")
  1850. return false
  1851. }
  1852. }
  1853. *handle = s
  1854. return true
  1855. }
  1856. // Scan a tag.
  1857. func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool {
  1858. //size_t length = head ? strlen((char *)head) : 0
  1859. var s []byte
  1860. hasTag := len(head) > 0
  1861. // Copy the head if needed.
  1862. //
  1863. // Note that we don't copy the leading '!' character.
  1864. if len(head) > 1 {
  1865. s = append(s, head[1:]...)
  1866. }
  1867. // Scan the tag.
  1868. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1869. return false
  1870. }
  1871. // The set of characters that may appear in URI is as follows:
  1872. //
  1873. // '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
  1874. // '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
  1875. // '%'.
  1876. // [Go] TODO Convert this into more reasonable logic.
  1877. for is_alpha(parser.buffer, parser.buffer_pos) || parser.buffer[parser.buffer_pos] == ';' ||
  1878. parser.buffer[parser.buffer_pos] == '/' || parser.buffer[parser.buffer_pos] == '?' ||
  1879. parser.buffer[parser.buffer_pos] == ':' || parser.buffer[parser.buffer_pos] == '@' ||
  1880. parser.buffer[parser.buffer_pos] == '&' || parser.buffer[parser.buffer_pos] == '=' ||
  1881. parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '$' ||
  1882. parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '.' ||
  1883. parser.buffer[parser.buffer_pos] == '!' || parser.buffer[parser.buffer_pos] == '~' ||
  1884. parser.buffer[parser.buffer_pos] == '*' || parser.buffer[parser.buffer_pos] == '\'' ||
  1885. parser.buffer[parser.buffer_pos] == '(' || parser.buffer[parser.buffer_pos] == ')' ||
  1886. parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' ||
  1887. parser.buffer[parser.buffer_pos] == '%' {
  1888. // Check if it is a URI-escape sequence.
  1889. if parser.buffer[parser.buffer_pos] == '%' {
  1890. if !yaml_parser_scan_uri_escapes(parser, directive, start_mark, &s) {
  1891. return false
  1892. }
  1893. } else {
  1894. s = read(parser, s)
  1895. }
  1896. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1897. return false
  1898. }
  1899. hasTag = true
  1900. }
  1901. if !hasTag {
  1902. yaml_parser_set_scanner_tag_error(parser, directive,
  1903. start_mark, "did not find expected tag URI")
  1904. return false
  1905. }
  1906. *uri = s
  1907. return true
  1908. }
  1909. // Decode an URI-escape sequence corresponding to a single UTF-8 character.
  1910. func yaml_parser_scan_uri_escapes(parser *yaml_parser_t, directive bool, start_mark yaml_mark_t, s *[]byte) bool {
  1911. // Decode the required number of characters.
  1912. w := 1024
  1913. for w > 0 {
  1914. // Check for a URI-escaped octet.
  1915. if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
  1916. return false
  1917. }
  1918. if !(parser.buffer[parser.buffer_pos] == '%' &&
  1919. is_hex(parser.buffer, parser.buffer_pos+1) &&
  1920. is_hex(parser.buffer, parser.buffer_pos+2)) {
  1921. return yaml_parser_set_scanner_tag_error(parser, directive,
  1922. start_mark, "did not find URI escaped octet")
  1923. }
  1924. // Get the octet.
  1925. octet := byte((as_hex(parser.buffer, parser.buffer_pos+1) << 4) + as_hex(parser.buffer, parser.buffer_pos+2))
  1926. // If it is the leading octet, determine the length of the UTF-8 sequence.
  1927. if w == 1024 {
  1928. w = width(octet)
  1929. if w == 0 {
  1930. return yaml_parser_set_scanner_tag_error(parser, directive,
  1931. start_mark, "found an incorrect leading UTF-8 octet")
  1932. }
  1933. } else {
  1934. // Check if the trailing octet is correct.
  1935. if octet&0xC0 != 0x80 {
  1936. return yaml_parser_set_scanner_tag_error(parser, directive,
  1937. start_mark, "found an incorrect trailing UTF-8 octet")
  1938. }
  1939. }
  1940. // Copy the octet and move the pointers.
  1941. *s = append(*s, octet)
  1942. skip(parser)
  1943. skip(parser)
  1944. skip(parser)
  1945. w--
  1946. }
  1947. return true
  1948. }
  1949. // Scan a block scalar.
  1950. func yaml_parser_scan_block_scalar(parser *yaml_parser_t, token *yaml_token_t, literal bool) bool {
  1951. // Eat the indicator '|' or '>'.
  1952. start_mark := parser.mark
  1953. skip(parser)
  1954. // Scan the additional block scalar indicators.
  1955. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1956. return false
  1957. }
  1958. // Check for a chomping indicator.
  1959. var chomping, increment int
  1960. if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
  1961. // Set the chomping method and eat the indicator.
  1962. if parser.buffer[parser.buffer_pos] == '+' {
  1963. chomping = +1
  1964. } else {
  1965. chomping = -1
  1966. }
  1967. skip(parser)
  1968. // Check for an indentation indicator.
  1969. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1970. return false
  1971. }
  1972. if is_digit(parser.buffer, parser.buffer_pos) {
  1973. // Check that the indentation is greater than 0.
  1974. if parser.buffer[parser.buffer_pos] == '0' {
  1975. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  1976. start_mark, "found an indentation indicator equal to 0")
  1977. return false
  1978. }
  1979. // Get the indentation level and eat the indicator.
  1980. increment = as_digit(parser.buffer, parser.buffer_pos)
  1981. skip(parser)
  1982. }
  1983. } else if is_digit(parser.buffer, parser.buffer_pos) {
  1984. // Do the same as above, but in the opposite order.
  1985. if parser.buffer[parser.buffer_pos] == '0' {
  1986. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  1987. start_mark, "found an indentation indicator equal to 0")
  1988. return false
  1989. }
  1990. increment = as_digit(parser.buffer, parser.buffer_pos)
  1991. skip(parser)
  1992. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  1993. return false
  1994. }
  1995. if parser.buffer[parser.buffer_pos] == '+' || parser.buffer[parser.buffer_pos] == '-' {
  1996. if parser.buffer[parser.buffer_pos] == '+' {
  1997. chomping = +1
  1998. } else {
  1999. chomping = -1
  2000. }
  2001. skip(parser)
  2002. }
  2003. }
  2004. // Eat whitespaces and comments to the end of the line.
  2005. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2006. return false
  2007. }
  2008. for is_blank(parser.buffer, parser.buffer_pos) {
  2009. skip(parser)
  2010. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2011. return false
  2012. }
  2013. }
  2014. if parser.buffer[parser.buffer_pos] == '#' {
  2015. if !yaml_parser_scan_line_comment(parser, start_mark) {
  2016. return false
  2017. }
  2018. for !is_breakz(parser.buffer, parser.buffer_pos) {
  2019. skip(parser)
  2020. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2021. return false
  2022. }
  2023. }
  2024. }
  2025. // Check if we are at the end of the line.
  2026. if !is_breakz(parser.buffer, parser.buffer_pos) {
  2027. yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  2028. start_mark, "did not find expected comment or line break")
  2029. return false
  2030. }
  2031. // Eat a line break.
  2032. if is_break(parser.buffer, parser.buffer_pos) {
  2033. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2034. return false
  2035. }
  2036. skip_line(parser)
  2037. }
  2038. end_mark := parser.mark
  2039. // Set the indentation level if it was specified.
  2040. var indent int
  2041. if increment > 0 {
  2042. if parser.indent >= 0 {
  2043. indent = parser.indent + increment
  2044. } else {
  2045. indent = increment
  2046. }
  2047. }
  2048. // Scan the leading line breaks and determine the indentation level if needed.
  2049. var s, leading_break, trailing_breaks []byte
  2050. if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
  2051. return false
  2052. }
  2053. // Scan the block scalar content.
  2054. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2055. return false
  2056. }
  2057. var leading_blank, trailing_blank bool
  2058. for parser.mark.column == indent && !is_z(parser.buffer, parser.buffer_pos) {
  2059. // We are at the beginning of a non-empty line.
  2060. // Is it a trailing whitespace?
  2061. trailing_blank = is_blank(parser.buffer, parser.buffer_pos)
  2062. // Check if we need to fold the leading line break.
  2063. if !literal && !leading_blank && !trailing_blank && len(leading_break) > 0 && leading_break[0] == '\n' {
  2064. // Do we need to join the lines by space?
  2065. if len(trailing_breaks) == 0 {
  2066. s = append(s, ' ')
  2067. }
  2068. } else {
  2069. s = append(s, leading_break...)
  2070. }
  2071. leading_break = leading_break[:0]
  2072. // Append the remaining line breaks.
  2073. s = append(s, trailing_breaks...)
  2074. trailing_breaks = trailing_breaks[:0]
  2075. // Is it a leading whitespace?
  2076. leading_blank = is_blank(parser.buffer, parser.buffer_pos)
  2077. // Consume the current line.
  2078. for !is_breakz(parser.buffer, parser.buffer_pos) {
  2079. s = read(parser, s)
  2080. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2081. return false
  2082. }
  2083. }
  2084. // Consume the line break.
  2085. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2086. return false
  2087. }
  2088. leading_break = read_line(parser, leading_break)
  2089. // Eat the following indentation spaces and line breaks.
  2090. if !yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark) {
  2091. return false
  2092. }
  2093. }
  2094. // Chomp the tail.
  2095. if chomping != -1 {
  2096. s = append(s, leading_break...)
  2097. }
  2098. if chomping == 1 {
  2099. s = append(s, trailing_breaks...)
  2100. }
  2101. // Create a token.
  2102. *token = yaml_token_t{
  2103. typ: yaml_SCALAR_TOKEN,
  2104. start_mark: start_mark,
  2105. end_mark: end_mark,
  2106. value: s,
  2107. style: yaml_LITERAL_SCALAR_STYLE,
  2108. }
  2109. if !literal {
  2110. token.style = yaml_FOLDED_SCALAR_STYLE
  2111. }
  2112. return true
  2113. }
  2114. // Scan indentation spaces and line breaks for a block scalar. Determine the
  2115. // indentation level if needed.
  2116. func yaml_parser_scan_block_scalar_breaks(parser *yaml_parser_t, indent *int, breaks *[]byte, start_mark yaml_mark_t, end_mark *yaml_mark_t) bool {
  2117. *end_mark = parser.mark
  2118. // Eat the indentation spaces and line breaks.
  2119. max_indent := 0
  2120. for {
  2121. // Eat the indentation spaces.
  2122. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2123. return false
  2124. }
  2125. for (*indent == 0 || parser.mark.column < *indent) && is_space(parser.buffer, parser.buffer_pos) {
  2126. skip(parser)
  2127. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2128. return false
  2129. }
  2130. }
  2131. if parser.mark.column > max_indent {
  2132. max_indent = parser.mark.column
  2133. }
  2134. // Check for a tab character messing the indentation.
  2135. if (*indent == 0 || parser.mark.column < *indent) && is_tab(parser.buffer, parser.buffer_pos) {
  2136. return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
  2137. start_mark, "found a tab character where an indentation space is expected")
  2138. }
  2139. // Have we found a non-empty line?
  2140. if !is_break(parser.buffer, parser.buffer_pos) {
  2141. break
  2142. }
  2143. // Consume the line break.
  2144. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2145. return false
  2146. }
  2147. // [Go] Should really be returning breaks instead.
  2148. *breaks = read_line(parser, *breaks)
  2149. *end_mark = parser.mark
  2150. }
  2151. // Determine the indentation level if needed.
  2152. if *indent == 0 {
  2153. *indent = max_indent
  2154. if *indent < parser.indent+1 {
  2155. *indent = parser.indent + 1
  2156. }
  2157. if *indent < 1 {
  2158. *indent = 1
  2159. }
  2160. }
  2161. return true
  2162. }
  2163. // Scan a quoted scalar.
  2164. func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, single bool) bool {
  2165. // Eat the left quote.
  2166. start_mark := parser.mark
  2167. skip(parser)
  2168. // Consume the content of the quoted scalar.
  2169. var s, leading_break, trailing_breaks, whitespaces []byte
  2170. for {
  2171. // Check that there are no document indicators at the beginning of the line.
  2172. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  2173. return false
  2174. }
  2175. if parser.mark.column == 0 &&
  2176. ((parser.buffer[parser.buffer_pos+0] == '-' &&
  2177. parser.buffer[parser.buffer_pos+1] == '-' &&
  2178. parser.buffer[parser.buffer_pos+2] == '-') ||
  2179. (parser.buffer[parser.buffer_pos+0] == '.' &&
  2180. parser.buffer[parser.buffer_pos+1] == '.' &&
  2181. parser.buffer[parser.buffer_pos+2] == '.')) &&
  2182. is_blankz(parser.buffer, parser.buffer_pos+3) {
  2183. yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
  2184. start_mark, "found unexpected document indicator")
  2185. return false
  2186. }
  2187. // Check for EOF.
  2188. if is_z(parser.buffer, parser.buffer_pos) {
  2189. yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
  2190. start_mark, "found unexpected end of stream")
  2191. return false
  2192. }
  2193. // Consume non-blank characters.
  2194. leading_blanks := false
  2195. for !is_blankz(parser.buffer, parser.buffer_pos) {
  2196. if single && parser.buffer[parser.buffer_pos] == '\'' && parser.buffer[parser.buffer_pos+1] == '\'' {
  2197. // Is is an escaped single quote.
  2198. s = append(s, '\'')
  2199. skip(parser)
  2200. skip(parser)
  2201. } else if single && parser.buffer[parser.buffer_pos] == '\'' {
  2202. // It is a right single quote.
  2203. break
  2204. } else if !single && parser.buffer[parser.buffer_pos] == '"' {
  2205. // It is a right double quote.
  2206. break
  2207. } else if !single && parser.buffer[parser.buffer_pos] == '\\' && is_break(parser.buffer, parser.buffer_pos+1) {
  2208. // It is an escaped line break.
  2209. if parser.unread < 3 && !yaml_parser_update_buffer(parser, 3) {
  2210. return false
  2211. }
  2212. skip(parser)
  2213. skip_line(parser)
  2214. leading_blanks = true
  2215. break
  2216. } else if !single && parser.buffer[parser.buffer_pos] == '\\' {
  2217. // It is an escape sequence.
  2218. code_length := 0
  2219. // Check the escape character.
  2220. switch parser.buffer[parser.buffer_pos+1] {
  2221. case '0':
  2222. s = append(s, 0)
  2223. case 'a':
  2224. s = append(s, '\x07')
  2225. case 'b':
  2226. s = append(s, '\x08')
  2227. case 't', '\t':
  2228. s = append(s, '\x09')
  2229. case 'n':
  2230. s = append(s, '\x0A')
  2231. case 'v':
  2232. s = append(s, '\x0B')
  2233. case 'f':
  2234. s = append(s, '\x0C')
  2235. case 'r':
  2236. s = append(s, '\x0D')
  2237. case 'e':
  2238. s = append(s, '\x1B')
  2239. case ' ':
  2240. s = append(s, '\x20')
  2241. case '"':
  2242. s = append(s, '"')
  2243. case '\'':
  2244. s = append(s, '\'')
  2245. case '\\':
  2246. s = append(s, '\\')
  2247. case 'N': // NEL (#x85)
  2248. s = append(s, '\xC2')
  2249. s = append(s, '\x85')
  2250. case '_': // #xA0
  2251. s = append(s, '\xC2')
  2252. s = append(s, '\xA0')
  2253. case 'L': // LS (#x2028)
  2254. s = append(s, '\xE2')
  2255. s = append(s, '\x80')
  2256. s = append(s, '\xA8')
  2257. case 'P': // PS (#x2029)
  2258. s = append(s, '\xE2')
  2259. s = append(s, '\x80')
  2260. s = append(s, '\xA9')
  2261. case 'x':
  2262. code_length = 2
  2263. case 'u':
  2264. code_length = 4
  2265. case 'U':
  2266. code_length = 8
  2267. default:
  2268. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2269. start_mark, "found unknown escape character")
  2270. return false
  2271. }
  2272. skip(parser)
  2273. skip(parser)
  2274. // Consume an arbitrary escape code.
  2275. if code_length > 0 {
  2276. var value int
  2277. // Scan the character value.
  2278. if parser.unread < code_length && !yaml_parser_update_buffer(parser, code_length) {
  2279. return false
  2280. }
  2281. for k := 0; k < code_length; k++ {
  2282. if !is_hex(parser.buffer, parser.buffer_pos+k) {
  2283. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2284. start_mark, "did not find expected hexdecimal number")
  2285. return false
  2286. }
  2287. value = (value << 4) + as_hex(parser.buffer, parser.buffer_pos+k)
  2288. }
  2289. // Check the value and write the character.
  2290. if (value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF {
  2291. yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
  2292. start_mark, "found invalid Unicode character escape code")
  2293. return false
  2294. }
  2295. if value <= 0x7F {
  2296. s = append(s, byte(value))
  2297. } else if value <= 0x7FF {
  2298. s = append(s, byte(0xC0+(value>>6)))
  2299. s = append(s, byte(0x80+(value&0x3F)))
  2300. } else if value <= 0xFFFF {
  2301. s = append(s, byte(0xE0+(value>>12)))
  2302. s = append(s, byte(0x80+((value>>6)&0x3F)))
  2303. s = append(s, byte(0x80+(value&0x3F)))
  2304. } else {
  2305. s = append(s, byte(0xF0+(value>>18)))
  2306. s = append(s, byte(0x80+((value>>12)&0x3F)))
  2307. s = append(s, byte(0x80+((value>>6)&0x3F)))
  2308. s = append(s, byte(0x80+(value&0x3F)))
  2309. }
  2310. // Advance the pointer.
  2311. for k := 0; k < code_length; k++ {
  2312. skip(parser)
  2313. }
  2314. }
  2315. } else {
  2316. // It is a non-escaped non-blank character.
  2317. s = read(parser, s)
  2318. }
  2319. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2320. return false
  2321. }
  2322. }
  2323. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2324. return false
  2325. }
  2326. // Check if we are at the end of the scalar.
  2327. if single {
  2328. if parser.buffer[parser.buffer_pos] == '\'' {
  2329. break
  2330. }
  2331. } else {
  2332. if parser.buffer[parser.buffer_pos] == '"' {
  2333. break
  2334. }
  2335. }
  2336. // Consume blank characters.
  2337. for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
  2338. if is_blank(parser.buffer, parser.buffer_pos) {
  2339. // Consume a space or a tab character.
  2340. if !leading_blanks {
  2341. whitespaces = read(parser, whitespaces)
  2342. } else {
  2343. skip(parser)
  2344. }
  2345. } else {
  2346. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2347. return false
  2348. }
  2349. // Check if it is a first line break.
  2350. if !leading_blanks {
  2351. whitespaces = whitespaces[:0]
  2352. leading_break = read_line(parser, leading_break)
  2353. leading_blanks = true
  2354. } else {
  2355. trailing_breaks = read_line(parser, trailing_breaks)
  2356. }
  2357. }
  2358. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2359. return false
  2360. }
  2361. }
  2362. // Join the whitespaces or fold line breaks.
  2363. if leading_blanks {
  2364. // Do we need to fold line breaks?
  2365. if len(leading_break) > 0 && leading_break[0] == '\n' {
  2366. if len(trailing_breaks) == 0 {
  2367. s = append(s, ' ')
  2368. } else {
  2369. s = append(s, trailing_breaks...)
  2370. }
  2371. } else {
  2372. s = append(s, leading_break...)
  2373. s = append(s, trailing_breaks...)
  2374. }
  2375. trailing_breaks = trailing_breaks[:0]
  2376. leading_break = leading_break[:0]
  2377. } else {
  2378. s = append(s, whitespaces...)
  2379. whitespaces = whitespaces[:0]
  2380. }
  2381. }
  2382. // Eat the right quote.
  2383. skip(parser)
  2384. end_mark := parser.mark
  2385. // Create a token.
  2386. *token = yaml_token_t{
  2387. typ: yaml_SCALAR_TOKEN,
  2388. start_mark: start_mark,
  2389. end_mark: end_mark,
  2390. value: s,
  2391. style: yaml_SINGLE_QUOTED_SCALAR_STYLE,
  2392. }
  2393. if !single {
  2394. token.style = yaml_DOUBLE_QUOTED_SCALAR_STYLE
  2395. }
  2396. return true
  2397. }
  2398. // Scan a plain scalar.
  2399. func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) bool {
  2400. var s, leading_break, trailing_breaks, whitespaces []byte
  2401. var leading_blanks bool
  2402. var indent = parser.indent + 1
  2403. start_mark := parser.mark
  2404. end_mark := parser.mark
  2405. // Consume the content of the plain scalar.
  2406. for {
  2407. // Check for a document indicator.
  2408. if parser.unread < 4 && !yaml_parser_update_buffer(parser, 4) {
  2409. return false
  2410. }
  2411. if parser.mark.column == 0 &&
  2412. ((parser.buffer[parser.buffer_pos+0] == '-' &&
  2413. parser.buffer[parser.buffer_pos+1] == '-' &&
  2414. parser.buffer[parser.buffer_pos+2] == '-') ||
  2415. (parser.buffer[parser.buffer_pos+0] == '.' &&
  2416. parser.buffer[parser.buffer_pos+1] == '.' &&
  2417. parser.buffer[parser.buffer_pos+2] == '.')) &&
  2418. is_blankz(parser.buffer, parser.buffer_pos+3) {
  2419. break
  2420. }
  2421. // Check for a comment.
  2422. if parser.buffer[parser.buffer_pos] == '#' {
  2423. break
  2424. }
  2425. // Consume non-blank characters.
  2426. for !is_blankz(parser.buffer, parser.buffer_pos) {
  2427. // Check for indicators that may end a plain scalar.
  2428. if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) ||
  2429. (parser.flow_level > 0 &&
  2430. (parser.buffer[parser.buffer_pos] == ',' ||
  2431. parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' ||
  2432. parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' ||
  2433. parser.buffer[parser.buffer_pos] == '}')) {
  2434. break
  2435. }
  2436. // Check if we need to join whitespaces and breaks.
  2437. if leading_blanks || len(whitespaces) > 0 {
  2438. if leading_blanks {
  2439. // Do we need to fold line breaks?
  2440. if leading_break[0] == '\n' {
  2441. if len(trailing_breaks) == 0 {
  2442. s = append(s, ' ')
  2443. } else {
  2444. s = append(s, trailing_breaks...)
  2445. }
  2446. } else {
  2447. s = append(s, leading_break...)
  2448. s = append(s, trailing_breaks...)
  2449. }
  2450. trailing_breaks = trailing_breaks[:0]
  2451. leading_break = leading_break[:0]
  2452. leading_blanks = false
  2453. } else {
  2454. s = append(s, whitespaces...)
  2455. whitespaces = whitespaces[:0]
  2456. }
  2457. }
  2458. // Copy the character.
  2459. s = read(parser, s)
  2460. end_mark = parser.mark
  2461. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2462. return false
  2463. }
  2464. }
  2465. // Is it the end?
  2466. if !(is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos)) {
  2467. break
  2468. }
  2469. // Consume blank characters.
  2470. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2471. return false
  2472. }
  2473. for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) {
  2474. if is_blank(parser.buffer, parser.buffer_pos) {
  2475. // Check for tab characters that abuse indentation.
  2476. if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) {
  2477. yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
  2478. start_mark, "found a tab character that violates indentation")
  2479. return false
  2480. }
  2481. // Consume a space or a tab character.
  2482. if !leading_blanks {
  2483. whitespaces = read(parser, whitespaces)
  2484. } else {
  2485. skip(parser)
  2486. }
  2487. } else {
  2488. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2489. return false
  2490. }
  2491. // Check if it is a first line break.
  2492. if !leading_blanks {
  2493. whitespaces = whitespaces[:0]
  2494. leading_break = read_line(parser, leading_break)
  2495. leading_blanks = true
  2496. } else {
  2497. trailing_breaks = read_line(parser, trailing_breaks)
  2498. }
  2499. }
  2500. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2501. return false
  2502. }
  2503. }
  2504. // Check indentation level.
  2505. if parser.flow_level == 0 && parser.mark.column < indent {
  2506. break
  2507. }
  2508. }
  2509. // Create a token.
  2510. *token = yaml_token_t{
  2511. typ: yaml_SCALAR_TOKEN,
  2512. start_mark: start_mark,
  2513. end_mark: end_mark,
  2514. value: s,
  2515. style: yaml_PLAIN_SCALAR_STYLE,
  2516. }
  2517. // Note that we change the 'simple_key_allowed' flag.
  2518. if leading_blanks {
  2519. parser.simple_key_allowed = true
  2520. }
  2521. return true
  2522. }
  2523. func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t) bool {
  2524. if parser.newlines > 0 {
  2525. return true
  2526. }
  2527. var start_mark yaml_mark_t
  2528. var text []byte
  2529. for peek := 0; peek < 512; peek++ {
  2530. if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
  2531. break
  2532. }
  2533. if is_blank(parser.buffer, parser.buffer_pos+peek) {
  2534. continue
  2535. }
  2536. if parser.buffer[parser.buffer_pos+peek] == '#' {
  2537. seen := parser.mark.index+peek
  2538. for {
  2539. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2540. return false
  2541. }
  2542. if is_breakz(parser.buffer, parser.buffer_pos) {
  2543. if parser.mark.index >= seen {
  2544. break
  2545. }
  2546. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2547. return false
  2548. }
  2549. skip_line(parser)
  2550. } else if parser.mark.index >= seen {
  2551. if len(text) == 0 {
  2552. start_mark = parser.mark
  2553. }
  2554. text = read(parser, text)
  2555. } else {
  2556. skip(parser)
  2557. }
  2558. }
  2559. }
  2560. break
  2561. }
  2562. if len(text) > 0 {
  2563. parser.comments = append(parser.comments, yaml_comment_t{
  2564. token_mark: token_mark,
  2565. start_mark: start_mark,
  2566. line: text,
  2567. })
  2568. }
  2569. return true
  2570. }
  2571. func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) bool {
  2572. token := parser.tokens[len(parser.tokens)-1]
  2573. if token.typ == yaml_FLOW_ENTRY_TOKEN && len(parser.tokens) > 1 {
  2574. token = parser.tokens[len(parser.tokens)-2]
  2575. }
  2576. var token_mark = token.start_mark
  2577. var start_mark yaml_mark_t
  2578. var next_indent = parser.indent
  2579. if next_indent < 0 {
  2580. next_indent = 0
  2581. }
  2582. var recent_empty = false
  2583. var first_empty = parser.newlines <= 1
  2584. var line = parser.mark.line
  2585. var column = parser.mark.column
  2586. var text []byte
  2587. // The foot line is the place where a comment must start to
  2588. // still be considered as a foot of the prior content.
  2589. // If there's some content in the currently parsed line, then
  2590. // the foot is the line below it.
  2591. var foot_line = -1
  2592. if scan_mark.line > 0 {
  2593. foot_line = parser.mark.line-parser.newlines+1
  2594. if parser.newlines == 0 && parser.mark.column > 1 {
  2595. foot_line++
  2596. }
  2597. }
  2598. var peek = 0
  2599. for ; peek < 512; peek++ {
  2600. if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
  2601. break
  2602. }
  2603. column++
  2604. if is_blank(parser.buffer, parser.buffer_pos+peek) {
  2605. continue
  2606. }
  2607. c := parser.buffer[parser.buffer_pos+peek]
  2608. var close_flow = parser.flow_level > 0 && (c == ']' || c == '}')
  2609. if close_flow || is_breakz(parser.buffer, parser.buffer_pos+peek) {
  2610. // Got line break or terminator.
  2611. if close_flow || !recent_empty {
  2612. if close_flow || first_empty && (start_mark.line == foot_line && token.typ != yaml_VALUE_TOKEN || start_mark.column-1 < next_indent) {
  2613. // This is the first empty line and there were no empty lines before,
  2614. // so this initial part of the comment is a foot of the prior token
  2615. // instead of being a head for the following one. Split it up.
  2616. // Alternatively, this might also be the last comment inside a flow
  2617. // scope, so it must be a footer.
  2618. if len(text) > 0 {
  2619. if start_mark.column-1 < next_indent {
  2620. // If dedented it's unrelated to the prior token.
  2621. token_mark = start_mark
  2622. }
  2623. parser.comments = append(parser.comments, yaml_comment_t{
  2624. scan_mark: scan_mark,
  2625. token_mark: token_mark,
  2626. start_mark: start_mark,
  2627. end_mark: yaml_mark_t{parser.mark.index + peek, line, column},
  2628. foot: text,
  2629. })
  2630. scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2631. token_mark = scan_mark
  2632. text = nil
  2633. }
  2634. } else {
  2635. if len(text) > 0 && parser.buffer[parser.buffer_pos+peek] != 0 {
  2636. text = append(text, '\n')
  2637. }
  2638. }
  2639. }
  2640. if !is_break(parser.buffer, parser.buffer_pos+peek) {
  2641. break
  2642. }
  2643. first_empty = false
  2644. recent_empty = true
  2645. column = 0
  2646. line++
  2647. continue
  2648. }
  2649. if len(text) > 0 && (close_flow || column-1 < next_indent && column != start_mark.column) {
  2650. // The comment at the different indentation is a foot of the
  2651. // preceding data rather than a head of the upcoming one.
  2652. parser.comments = append(parser.comments, yaml_comment_t{
  2653. scan_mark: scan_mark,
  2654. token_mark: token_mark,
  2655. start_mark: start_mark,
  2656. end_mark: yaml_mark_t{parser.mark.index + peek, line, column},
  2657. foot: text,
  2658. })
  2659. scan_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2660. token_mark = scan_mark
  2661. text = nil
  2662. }
  2663. if parser.buffer[parser.buffer_pos+peek] != '#' {
  2664. break
  2665. }
  2666. if len(text) == 0 {
  2667. start_mark = yaml_mark_t{parser.mark.index + peek, line, column}
  2668. } else {
  2669. text = append(text, '\n')
  2670. }
  2671. recent_empty = false
  2672. // Consume until after the consumed comment line.
  2673. seen := parser.mark.index+peek
  2674. for {
  2675. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
  2676. return false
  2677. }
  2678. if is_breakz(parser.buffer, parser.buffer_pos) {
  2679. if parser.mark.index >= seen {
  2680. break
  2681. }
  2682. if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
  2683. return false
  2684. }
  2685. skip_line(parser)
  2686. } else if parser.mark.index >= seen {
  2687. text = read(parser, text)
  2688. } else {
  2689. skip(parser)
  2690. }
  2691. }
  2692. peek = 0
  2693. column = 0
  2694. line = parser.mark.line
  2695. next_indent = parser.indent
  2696. if next_indent < 0 {
  2697. next_indent = 0
  2698. }
  2699. }
  2700. if len(text) > 0 {
  2701. parser.comments = append(parser.comments, yaml_comment_t{
  2702. scan_mark: scan_mark,
  2703. token_mark: start_mark,
  2704. start_mark: start_mark,
  2705. end_mark: yaml_mark_t{parser.mark.index + peek - 1, line, column},
  2706. head: text,
  2707. })
  2708. }
  2709. return true
  2710. }