1 //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // These backends consume the definitions of Syntax Tree nodes.
12 // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13 //
14 // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15 //   NODE(Kind, BaseKind)
16 //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17 // similar to those for AST nodes such as AST/DeclNodes.inc.
18 //
19 // The -gen-clang-syntax-node-classes backend produces definitions for the
20 // syntax::Node subclasses (except those marked as External).
21 //
22 // In future, another backend will encode the structure of the various node
23 // types in tables so their invariants can be checked and enforced.
24 //
25 //===----------------------------------------------------------------------===//
26 #include "TableGenBackends.h"
27 
28 #include <deque>
29 
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Support/FormatVariadic.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 
36 namespace {
37 using llvm::formatv;
38 
39 // The class hierarchy of Node types.
40 // We assemble this in order to be able to define the NodeKind enum in a
41 // stable and useful way, where abstract Node subclasses correspond to ranges.
42 class Hierarchy {
43 public:
Hierarchy(const llvm::RecordKeeper & Records)44   Hierarchy(const llvm::RecordKeeper &Records) {
45     for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
46       add(T);
47     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
48       if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
49         link(Derived, Base);
50     for (NodeType &N : AllTypes) {
51       llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
52         return L->Record->getName() < R->Record->getName();
53       });
54       // Alternatives nodes must have subclasses, External nodes may do.
55       assert(N.Record->isSubClassOf("Alternatives") ||
56              N.Record->isSubClassOf("External") || N.Derived.empty());
57       assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
58     }
59   }
60 
61   struct NodeType {
62     const llvm::Record *Record = nullptr;
63     const NodeType *Base = nullptr;
64     std::vector<const NodeType *> Derived;
name__anona9e818510111::Hierarchy::NodeType65     llvm::StringRef name() const { return Record->getName(); }
66   };
67 
get(llvm::StringRef Name="Node")68   NodeType &get(llvm::StringRef Name = "Node") {
69     auto NI = ByName.find(Name);
70     assert(NI != ByName.end() && "no such node");
71     return *NI->second;
72   }
73 
74   // Traverse the hierarchy in pre-order (base classes before derived).
visit(llvm::function_ref<void (const NodeType &)> CB,const NodeType * Start=nullptr)75   void visit(llvm::function_ref<void(const NodeType &)> CB,
76              const NodeType *Start = nullptr) {
77     if (Start == nullptr)
78       Start = &get();
79     CB(*Start);
80     for (const NodeType *D : Start->Derived)
81       visit(CB, D);
82   }
83 
84 private:
add(const llvm::Record * R)85   void add(const llvm::Record *R) {
86     AllTypes.emplace_back();
87     AllTypes.back().Record = R;
88     bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
89     assert(Inserted && "Duplicate node name");
90     (void)Inserted;
91   }
92 
link(const llvm::Record * Derived,const llvm::Record * Base)93   void link(const llvm::Record *Derived, const llvm::Record *Base) {
94     auto &CN = get(Derived->getName()), &PN = get(Base->getName());
95     assert(CN.Base == nullptr && "setting base twice");
96     PN.Derived.push_back(&CN);
97     CN.Base = &PN;
98   }
99 
100   std::deque<NodeType> AllTypes;
101   llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102 };
103 
firstConcrete(const Hierarchy::NodeType & N)104 const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105   return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
106 }
lastConcrete(const Hierarchy::NodeType & N)107 const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
109 }
110 
111 struct SyntaxConstraint {
SyntaxConstraint__anona9e818510111::SyntaxConstraint112   SyntaxConstraint(const llvm::Record &R) {
113     if (R.isSubClassOf("Optional")) {
114       *this = SyntaxConstraint(*R.getValueAsDef("inner"));
115     } else if (R.isSubClassOf("AnyToken")) {
116       NodeType = "Leaf";
117     } else if (R.isSubClassOf("NodeType")) {
118       NodeType = R.getName().str();
119     } else {
120       assert(false && "Unhandled Syntax kind");
121     }
122   }
123 
124   std::string NodeType;
125   // optional and leaf types also go here, once we want to use them.
126 };
127 
128 } // namespace
129 
EmitClangSyntaxNodeList(llvm::RecordKeeper & Records,llvm::raw_ostream & OS)130 void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
131                                     llvm::raw_ostream &OS) {
132   llvm::emitSourceFileHeader("Syntax tree node list", OS);
133   Hierarchy H(Records);
134   OS << R"cpp(
135 #ifndef NODE
136 #define NODE(Kind, Base)
137 #endif
138 
139 #ifndef CONCRETE_NODE
140 #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
141 #endif
142 
143 #ifndef ABSTRACT_NODE
144 #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
145 #endif
146 
147 )cpp";
148   H.visit([&](const Hierarchy::NodeType &N) {
149     // Don't emit ABSTRACT_NODE for node itself, which has no parent.
150     if (N.Base == nullptr)
151       return;
152     if (N.Derived.empty())
153       OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
154     else
155       OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
156                     N.Base->name(), firstConcrete(N).name(),
157                     lastConcrete(N).name());
158   });
159   OS << R"cpp(
160 #undef NODE
161 #undef CONCRETE_NODE
162 #undef ABSTRACT_NODE
163 )cpp";
164 }
165 
166 // Format a documentation string as a C++ comment.
167 // Trims leading whitespace handling since comments come from a TableGen file:
168 //    documentation = [{
169 //      This is a widget. Example:
170 //        widget.explode()
171 //    }];
172 // and should be formatted as:
173 //    /// This is a widget. Example:
174 //    ///   widget.explode()
175 // Leading and trailing whitespace lines are stripped.
176 // The indentation of the first line is stripped from all lines.
printDoc(llvm::StringRef Doc,llvm::raw_ostream & OS)177 static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
178   Doc = Doc.rtrim();
179   llvm::StringRef Line;
180   while (Line.trim().empty() && !Doc.empty())
181     std::tie(Line, Doc) = Doc.split('\n');
182   llvm::StringRef Indent = Line.take_while(llvm::isSpace);
183   for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
184     Line.consume_front(Indent);
185     OS << "/// " << Line << "\n";
186   }
187 }
188 
EmitClangSyntaxNodeClasses(llvm::RecordKeeper & Records,llvm::raw_ostream & OS)189 void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
190                                        llvm::raw_ostream &OS) {
191   llvm::emitSourceFileHeader("Syntax tree node list", OS);
192   Hierarchy H(Records);
193 
194   OS << "\n// Forward-declare node types so we don't have to carefully "
195         "sequence definitions.\n";
196   H.visit([&](const Hierarchy::NodeType &N) {
197     OS << "class " << N.name() << ";\n";
198   });
199 
200   OS << "\n// Node definitions\n\n";
201   H.visit([&](const Hierarchy::NodeType &N) {
202     if (N.Record->isSubClassOf("External"))
203       return;
204     printDoc(N.Record->getValueAsString("documentation"), OS);
205     OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
206                   N.Derived.empty() ? " final" : "", N.Base->name());
207 
208     // Constructor.
209     if (N.Derived.empty())
210       OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
211                     N.Base->name());
212     else
213       OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
214                     N.name(), N.Base->name());
215 
216     if (N.Record->isSubClassOf("Sequence")) {
217       // Getters for sequence elements.
218       for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
219         assert(C->isSubClassOf("Role"));
220         llvm::StringRef Role = C->getValueAsString("role");
221         SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
222         for (const char *Const : {"", "const "})
223           OS << formatv(
224               "  {2}{1} *get{0}() {2} {{\n"
225               "    return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
226               "  }\n",
227               Role, Constraint.NodeType, Const);
228       }
229     }
230 
231     // classof. FIXME: move definition inline once ~all nodes are generated.
232     OS << "  static bool classof(const Node *N);\n";
233 
234     OS << "};\n\n";
235   });
236 }
237