YAMLParser.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This is a YAML 1.2 parser.
  15. //
  16. // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
  17. //
  18. // This currently does not implement the following:
  19. // * Multi-line literal folding.
  20. // * Tag resolution.
  21. // * UTF-16.
  22. // * BOMs anywhere other than the first Unicode scalar value in the file.
  23. //
  24. // The most important class here is Stream. This represents a YAML stream with
  25. // 0, 1, or many documents.
  26. //
  27. // SourceMgr sm;
  28. // StringRef input = getInput();
  29. // yaml::Stream stream(input, sm);
  30. //
  31. // for (yaml::document_iterator di = stream.begin(), de = stream.end();
  32. // di != de; ++di) {
  33. // yaml::Node *n = di->getRoot();
  34. // if (n) {
  35. // // Do something with n...
  36. // } else
  37. // break;
  38. // }
  39. //
  40. //===----------------------------------------------------------------------===//
  41. #ifndef LLVM_SUPPORT_YAMLPARSER_H
  42. #define LLVM_SUPPORT_YAMLPARSER_H
  43. #include "llvm/ADT/StringRef.h"
  44. #include "llvm/Support/Allocator.h"
  45. #include "llvm/Support/SMLoc.h"
  46. #include "llvm/Support/SourceMgr.h"
  47. #include <cassert>
  48. #include <cstddef>
  49. #include <iterator>
  50. #include <map>
  51. #include <memory>
  52. #include <string>
  53. #include <system_error>
  54. namespace llvm {
  55. class MemoryBufferRef;
  56. class raw_ostream;
  57. class Twine;
  58. namespace yaml {
  59. class Document;
  60. class document_iterator;
  61. class Node;
  62. class Scanner;
  63. struct Token;
  64. /// Dump all the tokens in this stream to OS.
  65. /// \returns true if there was an error, false otherwise.
  66. bool dumpTokens(StringRef Input, raw_ostream &);
  67. /// Scans all tokens in input without outputting anything. This is used
  68. /// for benchmarking the tokenizer.
  69. /// \returns true if there was an error, false otherwise.
  70. bool scanTokens(StringRef Input);
  71. /// Escape \a Input for a double quoted scalar; if \p EscapePrintable
  72. /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
  73. /// false, those UTF8 sequences encoding printable unicode scalars will not be
  74. /// escaped, but emitted verbatim.
  75. std::string escape(StringRef Input, bool EscapePrintable = true);
  76. /// Parse \p S as a bool according to https://yaml.org/type/bool.html.
  77. llvm::Optional<bool> parseBool(StringRef S);
  78. /// This class represents a YAML stream potentially containing multiple
  79. /// documents.
  80. class Stream {
  81. public:
  82. /// This keeps a reference to the string referenced by \p Input.
  83. Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
  84. std::error_code *EC = nullptr);
  85. Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
  86. std::error_code *EC = nullptr);
  87. ~Stream();
  88. document_iterator begin();
  89. document_iterator end();
  90. void skip();
  91. bool failed();
  92. bool validate() {
  93. skip();
  94. return !failed();
  95. }
  96. void printError(Node *N, const Twine &Msg,
  97. SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
  98. void printError(const SMRange &Range, const Twine &Msg,
  99. SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
  100. private:
  101. friend class Document;
  102. std::unique_ptr<Scanner> scanner;
  103. std::unique_ptr<Document> CurrentDoc;
  104. };
  105. /// Abstract base class for all Nodes.
  106. class Node {
  107. virtual void anchor();
  108. public:
  109. enum NodeKind {
  110. NK_Null,
  111. NK_Scalar,
  112. NK_BlockScalar,
  113. NK_KeyValue,
  114. NK_Mapping,
  115. NK_Sequence,
  116. NK_Alias
  117. };
  118. Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
  119. StringRef Tag);
  120. // It's not safe to copy YAML nodes; the document is streamed and the position
  121. // is part of the state.
  122. Node(const Node &) = delete;
  123. void operator=(const Node &) = delete;
  124. void *operator new(size_t Size, BumpPtrAllocator &Alloc,
  125. size_t Alignment = 16) noexcept {
  126. return Alloc.Allocate(Size, Alignment);
  127. }
  128. void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
  129. size_t Size) noexcept {
  130. Alloc.Deallocate(Ptr, Size, 0);
  131. }
  132. void operator delete(void *) noexcept = delete;
  133. /// Get the value of the anchor attached to this node. If it does not
  134. /// have one, getAnchor().size() will be 0.
  135. StringRef getAnchor() const { return Anchor; }
  136. /// Get the tag as it was written in the document. This does not
  137. /// perform tag resolution.
  138. StringRef getRawTag() const { return Tag; }
  139. /// Get the verbatium tag for a given Node. This performs tag resoluton
  140. /// and substitution.
  141. std::string getVerbatimTag() const;
  142. SMRange getSourceRange() const { return SourceRange; }
  143. void setSourceRange(SMRange SR) { SourceRange = SR; }
  144. // These functions forward to Document and Scanner.
  145. Token &peekNext();
  146. Token getNext();
  147. Node *parseBlockNode();
  148. BumpPtrAllocator &getAllocator();
  149. void setError(const Twine &Message, Token &Location) const;
  150. bool failed() const;
  151. virtual void skip() {}
  152. unsigned int getType() const { return TypeID; }
  153. protected:
  154. std::unique_ptr<Document> &Doc;
  155. SMRange SourceRange;
  156. ~Node() = default;
  157. private:
  158. unsigned int TypeID;
  159. StringRef Anchor;
  160. /// The tag as typed in the document.
  161. StringRef Tag;
  162. };
  163. /// A null value.
  164. ///
  165. /// Example:
  166. /// !!null null
  167. class NullNode final : public Node {
  168. void anchor() override;
  169. public:
  170. NullNode(std::unique_ptr<Document> &D)
  171. : Node(NK_Null, D, StringRef(), StringRef()) {}
  172. static bool classof(const Node *N) { return N->getType() == NK_Null; }
  173. };
  174. /// A scalar node is an opaque datum that can be presented as a
  175. /// series of zero or more Unicode scalar values.
  176. ///
  177. /// Example:
  178. /// Adena
  179. class ScalarNode final : public Node {
  180. void anchor() override;
  181. public:
  182. ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  183. StringRef Val)
  184. : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
  185. SMLoc Start = SMLoc::getFromPointer(Val.begin());
  186. SMLoc End = SMLoc::getFromPointer(Val.end());
  187. SourceRange = SMRange(Start, End);
  188. }
  189. // Return Value without any escaping or folding or other fun YAML stuff. This
  190. // is the exact bytes that are contained in the file (after conversion to
  191. // utf8).
  192. StringRef getRawValue() const { return Value; }
  193. /// Gets the value of this node as a StringRef.
  194. ///
  195. /// \param Storage is used to store the content of the returned StringRef if
  196. /// it requires any modification from how it appeared in the source.
  197. /// This happens with escaped characters and multi-line literals.
  198. StringRef getValue(SmallVectorImpl<char> &Storage) const;
  199. static bool classof(const Node *N) {
  200. return N->getType() == NK_Scalar;
  201. }
  202. private:
  203. StringRef Value;
  204. StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
  205. StringRef::size_type Start,
  206. SmallVectorImpl<char> &Storage) const;
  207. };
  208. /// A block scalar node is an opaque datum that can be presented as a
  209. /// series of zero or more Unicode scalar values.
  210. ///
  211. /// Example:
  212. /// |
  213. /// Hello
  214. /// World
  215. class BlockScalarNode final : public Node {
  216. void anchor() override;
  217. public:
  218. BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  219. StringRef Value, StringRef RawVal)
  220. : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
  221. SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
  222. SMLoc End = SMLoc::getFromPointer(RawVal.end());
  223. SourceRange = SMRange(Start, End);
  224. }
  225. /// Gets the value of this node as a StringRef.
  226. StringRef getValue() const { return Value; }
  227. static bool classof(const Node *N) {
  228. return N->getType() == NK_BlockScalar;
  229. }
  230. private:
  231. StringRef Value;
  232. };
  233. /// A key and value pair. While not technically a Node under the YAML
  234. /// representation graph, it is easier to treat them this way.
  235. ///
  236. /// TODO: Consider making this not a child of Node.
  237. ///
  238. /// Example:
  239. /// Section: .text
  240. class KeyValueNode final : public Node {
  241. void anchor() override;
  242. public:
  243. KeyValueNode(std::unique_ptr<Document> &D)
  244. : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
  245. /// Parse and return the key.
  246. ///
  247. /// This may be called multiple times.
  248. ///
  249. /// \returns The key, or nullptr if failed() == true.
  250. Node *getKey();
  251. /// Parse and return the value.
  252. ///
  253. /// This may be called multiple times.
  254. ///
  255. /// \returns The value, or nullptr if failed() == true.
  256. Node *getValue();
  257. void skip() override {
  258. if (Node *Key = getKey()) {
  259. Key->skip();
  260. if (Node *Val = getValue())
  261. Val->skip();
  262. }
  263. }
  264. static bool classof(const Node *N) {
  265. return N->getType() == NK_KeyValue;
  266. }
  267. private:
  268. Node *Key = nullptr;
  269. Node *Value = nullptr;
  270. };
  271. /// This is an iterator abstraction over YAML collections shared by both
  272. /// sequences and maps.
  273. ///
  274. /// BaseT must have a ValueT* member named CurrentEntry and a member function
  275. /// increment() which must set CurrentEntry to 0 to create an end iterator.
  276. template <class BaseT, class ValueT> class basic_collection_iterator {
  277. public:
  278. using iterator_category = std::input_iterator_tag;
  279. using value_type = ValueT;
  280. using difference_type = std::ptrdiff_t;
  281. using pointer = value_type *;
  282. using reference = value_type &;
  283. basic_collection_iterator() = default;
  284. basic_collection_iterator(BaseT *B) : Base(B) {}
  285. ValueT *operator->() const {
  286. assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  287. return Base->CurrentEntry;
  288. }
  289. ValueT &operator*() const {
  290. assert(Base && Base->CurrentEntry &&
  291. "Attempted to dereference end iterator!");
  292. return *Base->CurrentEntry;
  293. }
  294. operator ValueT *() const {
  295. assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  296. return Base->CurrentEntry;
  297. }
  298. /// Note on EqualityComparable:
  299. ///
  300. /// The iterator is not re-entrant,
  301. /// it is meant to be used for parsing YAML on-demand
  302. /// Once iteration started - it can point only to one entry at a time
  303. /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
  304. /// iff Base and Other.Base are equal.
  305. bool operator==(const basic_collection_iterator &Other) const {
  306. if (Base && (Base == Other.Base)) {
  307. assert((Base->CurrentEntry == Other.Base->CurrentEntry)
  308. && "Equal Bases expected to point to equal Entries");
  309. }
  310. return Base == Other.Base;
  311. }
  312. bool operator!=(const basic_collection_iterator &Other) const {
  313. return !(Base == Other.Base);
  314. }
  315. basic_collection_iterator &operator++() {
  316. assert(Base && "Attempted to advance iterator past end!");
  317. Base->increment();
  318. // Create an end iterator.
  319. if (!Base->CurrentEntry)
  320. Base = nullptr;
  321. return *this;
  322. }
  323. private:
  324. BaseT *Base = nullptr;
  325. };
  326. // The following two templates are used for both MappingNode and Sequence Node.
  327. template <class CollectionType>
  328. typename CollectionType::iterator begin(CollectionType &C) {
  329. assert(C.IsAtBeginning && "You may only iterate over a collection once!");
  330. C.IsAtBeginning = false;
  331. typename CollectionType::iterator ret(&C);
  332. ++ret;
  333. return ret;
  334. }
  335. template <class CollectionType> void skip(CollectionType &C) {
  336. // TODO: support skipping from the middle of a parsed collection ;/
  337. assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
  338. if (C.IsAtBeginning)
  339. for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
  340. ++i)
  341. i->skip();
  342. }
  343. /// Represents a YAML map created from either a block map for a flow map.
  344. ///
  345. /// This parses the YAML stream as increment() is called.
  346. ///
  347. /// Example:
  348. /// Name: _main
  349. /// Scope: Global
  350. class MappingNode final : public Node {
  351. void anchor() override;
  352. public:
  353. enum MappingType {
  354. MT_Block,
  355. MT_Flow,
  356. MT_Inline ///< An inline mapping node is used for "[key: value]".
  357. };
  358. MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  359. MappingType MT)
  360. : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
  361. friend class basic_collection_iterator<MappingNode, KeyValueNode>;
  362. using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
  363. template <class T> friend typename T::iterator yaml::begin(T &);
  364. template <class T> friend void yaml::skip(T &);
  365. iterator begin() { return yaml::begin(*this); }
  366. iterator end() { return iterator(); }
  367. void skip() override { yaml::skip(*this); }
  368. static bool classof(const Node *N) {
  369. return N->getType() == NK_Mapping;
  370. }
  371. private:
  372. MappingType Type;
  373. bool IsAtBeginning = true;
  374. bool IsAtEnd = false;
  375. KeyValueNode *CurrentEntry = nullptr;
  376. void increment();
  377. };
  378. /// Represents a YAML sequence created from either a block sequence for a
  379. /// flow sequence.
  380. ///
  381. /// This parses the YAML stream as increment() is called.
  382. ///
  383. /// Example:
  384. /// - Hello
  385. /// - World
  386. class SequenceNode final : public Node {
  387. void anchor() override;
  388. public:
  389. enum SequenceType {
  390. ST_Block,
  391. ST_Flow,
  392. // Use for:
  393. //
  394. // key:
  395. // - val1
  396. // - val2
  397. //
  398. // As a BlockMappingEntry and BlockEnd are not created in this case.
  399. ST_Indentless
  400. };
  401. SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  402. SequenceType ST)
  403. : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
  404. friend class basic_collection_iterator<SequenceNode, Node>;
  405. using iterator = basic_collection_iterator<SequenceNode, Node>;
  406. template <class T> friend typename T::iterator yaml::begin(T &);
  407. template <class T> friend void yaml::skip(T &);
  408. void increment();
  409. iterator begin() { return yaml::begin(*this); }
  410. iterator end() { return iterator(); }
  411. void skip() override { yaml::skip(*this); }
  412. static bool classof(const Node *N) {
  413. return N->getType() == NK_Sequence;
  414. }
  415. private:
  416. SequenceType SeqType;
  417. bool IsAtBeginning = true;
  418. bool IsAtEnd = false;
  419. bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
  420. Node *CurrentEntry = nullptr;
  421. };
  422. /// Represents an alias to a Node with an anchor.
  423. ///
  424. /// Example:
  425. /// *AnchorName
  426. class AliasNode final : public Node {
  427. void anchor() override;
  428. public:
  429. AliasNode(std::unique_ptr<Document> &D, StringRef Val)
  430. : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
  431. StringRef getName() const { return Name; }
  432. static bool classof(const Node *N) { return N->getType() == NK_Alias; }
  433. private:
  434. StringRef Name;
  435. };
  436. /// A YAML Stream is a sequence of Documents. A document contains a root
  437. /// node.
  438. class Document {
  439. public:
  440. Document(Stream &ParentStream);
  441. /// Root for parsing a node. Returns a single node.
  442. Node *parseBlockNode();
  443. /// Finish parsing the current document and return true if there are
  444. /// more. Return false otherwise.
  445. bool skip();
  446. /// Parse and return the root level node.
  447. Node *getRoot() {
  448. if (Root)
  449. return Root;
  450. return Root = parseBlockNode();
  451. }
  452. const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
  453. private:
  454. friend class Node;
  455. friend class document_iterator;
  456. /// Stream to read tokens from.
  457. Stream &stream;
  458. /// Used to allocate nodes to. All are destroyed without calling their
  459. /// destructor when the document is destroyed.
  460. BumpPtrAllocator NodeAllocator;
  461. /// The root node. Used to support skipping a partially parsed
  462. /// document.
  463. Node *Root;
  464. /// Maps tag prefixes to their expansion.
  465. std::map<StringRef, StringRef> TagMap;
  466. Token &peekNext();
  467. Token getNext();
  468. void setError(const Twine &Message, Token &Location) const;
  469. bool failed() const;
  470. /// Parse %BLAH directives and return true if any were encountered.
  471. bool parseDirectives();
  472. /// Parse %YAML
  473. void parseYAMLDirective();
  474. /// Parse %TAG
  475. void parseTAGDirective();
  476. /// Consume the next token and error if it is not \a TK.
  477. bool expectToken(int TK);
  478. };
  479. /// Iterator abstraction for Documents over a Stream.
  480. class document_iterator {
  481. public:
  482. document_iterator() = default;
  483. document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
  484. bool operator==(const document_iterator &Other) const {
  485. if (isAtEnd() || Other.isAtEnd())
  486. return isAtEnd() && Other.isAtEnd();
  487. return Doc == Other.Doc;
  488. }
  489. bool operator!=(const document_iterator &Other) const {
  490. return !(*this == Other);
  491. }
  492. document_iterator operator++() {
  493. assert(Doc && "incrementing iterator past the end.");
  494. if (!(*Doc)->skip()) {
  495. Doc->reset(nullptr);
  496. } else {
  497. Stream &S = (*Doc)->stream;
  498. Doc->reset(new Document(S));
  499. }
  500. return *this;
  501. }
  502. Document &operator*() { return *Doc->get(); }
  503. std::unique_ptr<Document> &operator->() { return *Doc; }
  504. private:
  505. bool isAtEnd() const { return !Doc || !*Doc; }
  506. std::unique_ptr<Document> *Doc = nullptr;
  507. };
  508. } // end namespace yaml
  509. } // end namespace llvm
  510. #endif // LLVM_SUPPORT_YAMLPARSER_H
  511. #ifdef __GNUC__
  512. #pragma GCC diagnostic pop
  513. #endif