ClangSyntaxEmitter.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  6. // See https://llvm.org/LICENSE.txt for license information.
  7. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  8. //
  9. //===----------------------------------------------------------------------===//
  10. //
  11. // These backends consume the definitions of Syntax Tree nodes.
  12. // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
  13. //
  14. // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
  15. // NODE(Kind, BaseKind)
  16. // ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
  17. // similar to those for AST nodes such as AST/DeclNodes.inc.
  18. //
  19. // The -gen-clang-syntax-node-classes backend produces definitions for the
  20. // syntax::Node subclasses (except those marked as External).
  21. //
  22. // In future, another backend will encode the structure of the various node
  23. // types in tables so their invariants can be checked and enforced.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. #include "TableGenBackends.h"
  27. #include <deque>
  28. #include "llvm/ADT/StringExtras.h"
  29. #include "llvm/Support/FormatVariadic.h"
  30. #include "llvm/Support/raw_ostream.h"
  31. #include "llvm/TableGen/Record.h"
  32. #include "llvm/TableGen/TableGenBackend.h"
  33. namespace {
  34. using llvm::formatv;
  35. // The class hierarchy of Node types.
  36. // We assemble this in order to be able to define the NodeKind enum in a
  37. // stable and useful way, where abstract Node subclasses correspond to ranges.
  38. class Hierarchy {
  39. public:
  40. Hierarchy(const llvm::RecordKeeper &Records) {
  41. for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
  42. add(T);
  43. for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
  44. if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
  45. link(Derived, Base);
  46. for (NodeType &N : AllTypes) {
  47. llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
  48. return L->Record->getName() < R->Record->getName();
  49. });
  50. // Alternatives nodes must have subclasses, External nodes may do.
  51. assert(N.Record->isSubClassOf("Alternatives") ||
  52. N.Record->isSubClassOf("External") || N.Derived.empty());
  53. assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
  54. }
  55. }
  56. struct NodeType {
  57. const llvm::Record *Record = nullptr;
  58. const NodeType *Base = nullptr;
  59. std::vector<const NodeType *> Derived;
  60. llvm::StringRef name() const { return Record->getName(); }
  61. };
  62. NodeType &get(llvm::StringRef Name = "Node") {
  63. auto NI = ByName.find(Name);
  64. assert(NI != ByName.end() && "no such node");
  65. return *NI->second;
  66. }
  67. // Traverse the hierarchy in pre-order (base classes before derived).
  68. void visit(llvm::function_ref<void(const NodeType &)> CB,
  69. const NodeType *Start = nullptr) {
  70. if (Start == nullptr)
  71. Start = &get();
  72. CB(*Start);
  73. for (const NodeType *D : Start->Derived)
  74. visit(CB, D);
  75. }
  76. private:
  77. void add(const llvm::Record *R) {
  78. AllTypes.emplace_back();
  79. AllTypes.back().Record = R;
  80. bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
  81. assert(Inserted && "Duplicate node name");
  82. (void)Inserted;
  83. }
  84. void link(const llvm::Record *Derived, const llvm::Record *Base) {
  85. auto &CN = get(Derived->getName()), &PN = get(Base->getName());
  86. assert(CN.Base == nullptr && "setting base twice");
  87. PN.Derived.push_back(&CN);
  88. CN.Base = &PN;
  89. }
  90. std::deque<NodeType> AllTypes;
  91. llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
  92. };
  93. const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
  94. return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
  95. }
  96. const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
  97. return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
  98. }
  99. struct SyntaxConstraint {
  100. SyntaxConstraint(const llvm::Record &R) {
  101. if (R.isSubClassOf("Optional")) {
  102. *this = SyntaxConstraint(*R.getValueAsDef("inner"));
  103. } else if (R.isSubClassOf("AnyToken")) {
  104. NodeType = "Leaf";
  105. } else if (R.isSubClassOf("NodeType")) {
  106. NodeType = R.getName().str();
  107. } else {
  108. assert(false && "Unhandled Syntax kind");
  109. }
  110. }
  111. std::string NodeType;
  112. // optional and leaf types also go here, once we want to use them.
  113. };
  114. } // namespace
  115. void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
  116. llvm::raw_ostream &OS) {
  117. llvm::emitSourceFileHeader("Syntax tree node list", OS);
  118. Hierarchy H(Records);
  119. OS << R"cpp(
  120. #ifndef NODE
  121. #define NODE(Kind, Base)
  122. #endif
  123. #ifndef CONCRETE_NODE
  124. #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
  125. #endif
  126. #ifndef ABSTRACT_NODE
  127. #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
  128. #endif
  129. )cpp";
  130. H.visit([&](const Hierarchy::NodeType &N) {
  131. // Don't emit ABSTRACT_NODE for node itself, which has no parent.
  132. if (N.Base == nullptr)
  133. return;
  134. if (N.Derived.empty())
  135. OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
  136. else
  137. OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
  138. N.Base->name(), firstConcrete(N).name(),
  139. lastConcrete(N).name());
  140. });
  141. OS << R"cpp(
  142. #undef NODE
  143. #undef CONCRETE_NODE
  144. #undef ABSTRACT_NODE
  145. )cpp";
  146. }
  147. // Format a documentation string as a C++ comment.
  148. // Trims leading whitespace handling since comments come from a TableGen file:
  149. // documentation = [{
  150. // This is a widget. Example:
  151. // widget.explode()
  152. // }];
  153. // and should be formatted as:
  154. // /// This is a widget. Example:
  155. // /// widget.explode()
  156. // Leading and trailing whitespace lines are stripped.
  157. // The indentation of the first line is stripped from all lines.
  158. static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
  159. Doc = Doc.rtrim();
  160. llvm::StringRef Line;
  161. while (Line.trim().empty() && !Doc.empty())
  162. std::tie(Line, Doc) = Doc.split('\n');
  163. llvm::StringRef Indent = Line.take_while(llvm::isSpace);
  164. for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
  165. Line.consume_front(Indent);
  166. OS << "/// " << Line << "\n";
  167. }
  168. }
  169. void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
  170. llvm::raw_ostream &OS) {
  171. llvm::emitSourceFileHeader("Syntax tree node list", OS);
  172. Hierarchy H(Records);
  173. OS << "\n// Forward-declare node types so we don't have to carefully "
  174. "sequence definitions.\n";
  175. H.visit([&](const Hierarchy::NodeType &N) {
  176. OS << "class " << N.name() << ";\n";
  177. });
  178. OS << "\n// Node definitions\n\n";
  179. H.visit([&](const Hierarchy::NodeType &N) {
  180. if (N.Record->isSubClassOf("External"))
  181. return;
  182. printDoc(N.Record->getValueAsString("documentation"), OS);
  183. OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
  184. N.Derived.empty() ? " final" : "", N.Base->name());
  185. // Constructor.
  186. if (N.Derived.empty())
  187. OS << formatv("public:\n {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
  188. N.Base->name());
  189. else
  190. OS << formatv("protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
  191. N.name(), N.Base->name());
  192. if (N.Record->isSubClassOf("Sequence")) {
  193. // Getters for sequence elements.
  194. for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
  195. assert(C->isSubClassOf("Role"));
  196. llvm::StringRef Role = C->getValueAsString("role");
  197. SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
  198. for (const char *Const : {"", "const "})
  199. OS << formatv(
  200. " {2}{1} *get{0}() {2} {{\n"
  201. " return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
  202. " }\n",
  203. Role, Constraint.NodeType, Const);
  204. }
  205. }
  206. // classof. FIXME: move definition inline once ~all nodes are generated.
  207. OS << " static bool classof(const Node *N);\n";
  208. OS << "};\n\n";
  209. });
  210. }