1*a9ac8606Spatrick //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
2*a9ac8606Spatrick //
3*a9ac8606Spatrick //                     The LLVM Compiler Infrastructure
4*a9ac8606Spatrick //
5*a9ac8606Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6*a9ac8606Spatrick // See https://llvm.org/LICENSE.txt for license information.
7*a9ac8606Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8*a9ac8606Spatrick //
9*a9ac8606Spatrick //===----------------------------------------------------------------------===//
10*a9ac8606Spatrick //
11*a9ac8606Spatrick // These backends consume the definitions of Syntax Tree nodes.
12*a9ac8606Spatrick // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13*a9ac8606Spatrick //
14*a9ac8606Spatrick // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15*a9ac8606Spatrick //   NODE(Kind, BaseKind)
16*a9ac8606Spatrick //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17*a9ac8606Spatrick // similar to those for AST nodes such as AST/DeclNodes.inc.
18*a9ac8606Spatrick //
19*a9ac8606Spatrick // The -gen-clang-syntax-node-classes backend produces definitions for the
20*a9ac8606Spatrick // syntax::Node subclasses (except those marked as External).
21*a9ac8606Spatrick //
22*a9ac8606Spatrick // In future, another backend will encode the structure of the various node
23*a9ac8606Spatrick // types in tables so their invariants can be checked and enforced.
24*a9ac8606Spatrick //
25*a9ac8606Spatrick //===----------------------------------------------------------------------===//
26*a9ac8606Spatrick #include "TableGenBackends.h"
27*a9ac8606Spatrick 
28*a9ac8606Spatrick #include <deque>
29*a9ac8606Spatrick 
30*a9ac8606Spatrick #include "llvm/ADT/StringExtras.h"
31*a9ac8606Spatrick #include "llvm/Support/FormatVariadic.h"
32*a9ac8606Spatrick #include "llvm/Support/raw_ostream.h"
33*a9ac8606Spatrick #include "llvm/TableGen/Record.h"
34*a9ac8606Spatrick #include "llvm/TableGen/TableGenBackend.h"
35*a9ac8606Spatrick 
36*a9ac8606Spatrick namespace {
37*a9ac8606Spatrick using llvm::formatv;
38*a9ac8606Spatrick 
39*a9ac8606Spatrick // The class hierarchy of Node types.
40*a9ac8606Spatrick // We assemble this in order to be able to define the NodeKind enum in a
41*a9ac8606Spatrick // stable and useful way, where abstract Node subclasses correspond to ranges.
42*a9ac8606Spatrick class Hierarchy {
43*a9ac8606Spatrick public:
Hierarchy(const llvm::RecordKeeper & Records)44*a9ac8606Spatrick   Hierarchy(const llvm::RecordKeeper &Records) {
45*a9ac8606Spatrick     for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
46*a9ac8606Spatrick       add(T);
47*a9ac8606Spatrick     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
48*a9ac8606Spatrick       if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
49*a9ac8606Spatrick         link(Derived, Base);
50*a9ac8606Spatrick     for (NodeType &N : AllTypes) {
51*a9ac8606Spatrick       llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
52*a9ac8606Spatrick         return L->Record->getName() < R->Record->getName();
53*a9ac8606Spatrick       });
54*a9ac8606Spatrick       // Alternatives nodes must have subclasses, External nodes may do.
55*a9ac8606Spatrick       assert(N.Record->isSubClassOf("Alternatives") ||
56*a9ac8606Spatrick              N.Record->isSubClassOf("External") || N.Derived.empty());
57*a9ac8606Spatrick       assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
58*a9ac8606Spatrick     }
59*a9ac8606Spatrick   }
60*a9ac8606Spatrick 
61*a9ac8606Spatrick   struct NodeType {
62*a9ac8606Spatrick     const llvm::Record *Record = nullptr;
63*a9ac8606Spatrick     const NodeType *Base = nullptr;
64*a9ac8606Spatrick     std::vector<const NodeType *> Derived;
name__anon736a35580111::Hierarchy::NodeType65*a9ac8606Spatrick     llvm::StringRef name() const { return Record->getName(); }
66*a9ac8606Spatrick   };
67*a9ac8606Spatrick 
get(llvm::StringRef Name="Node")68*a9ac8606Spatrick   NodeType &get(llvm::StringRef Name = "Node") {
69*a9ac8606Spatrick     auto NI = ByName.find(Name);
70*a9ac8606Spatrick     assert(NI != ByName.end() && "no such node");
71*a9ac8606Spatrick     return *NI->second;
72*a9ac8606Spatrick   }
73*a9ac8606Spatrick 
74*a9ac8606Spatrick   // Traverse the hierarchy in pre-order (base classes before derived).
visit(llvm::function_ref<void (const NodeType &)> CB,const NodeType * Start=nullptr)75*a9ac8606Spatrick   void visit(llvm::function_ref<void(const NodeType &)> CB,
76*a9ac8606Spatrick              const NodeType *Start = nullptr) {
77*a9ac8606Spatrick     if (Start == nullptr)
78*a9ac8606Spatrick       Start = &get();
79*a9ac8606Spatrick     CB(*Start);
80*a9ac8606Spatrick     for (const NodeType *D : Start->Derived)
81*a9ac8606Spatrick       visit(CB, D);
82*a9ac8606Spatrick   }
83*a9ac8606Spatrick 
84*a9ac8606Spatrick private:
add(const llvm::Record * R)85*a9ac8606Spatrick   void add(const llvm::Record *R) {
86*a9ac8606Spatrick     AllTypes.emplace_back();
87*a9ac8606Spatrick     AllTypes.back().Record = R;
88*a9ac8606Spatrick     bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
89*a9ac8606Spatrick     assert(Inserted && "Duplicate node name");
90*a9ac8606Spatrick     (void)Inserted;
91*a9ac8606Spatrick   }
92*a9ac8606Spatrick 
link(const llvm::Record * Derived,const llvm::Record * Base)93*a9ac8606Spatrick   void link(const llvm::Record *Derived, const llvm::Record *Base) {
94*a9ac8606Spatrick     auto &CN = get(Derived->getName()), &PN = get(Base->getName());
95*a9ac8606Spatrick     assert(CN.Base == nullptr && "setting base twice");
96*a9ac8606Spatrick     PN.Derived.push_back(&CN);
97*a9ac8606Spatrick     CN.Base = &PN;
98*a9ac8606Spatrick   }
99*a9ac8606Spatrick 
100*a9ac8606Spatrick   std::deque<NodeType> AllTypes;
101*a9ac8606Spatrick   llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102*a9ac8606Spatrick };
103*a9ac8606Spatrick 
firstConcrete(const Hierarchy::NodeType & N)104*a9ac8606Spatrick const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105*a9ac8606Spatrick   return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
106*a9ac8606Spatrick }
lastConcrete(const Hierarchy::NodeType & N)107*a9ac8606Spatrick const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108*a9ac8606Spatrick   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
109*a9ac8606Spatrick }
110*a9ac8606Spatrick 
111*a9ac8606Spatrick struct SyntaxConstraint {
SyntaxConstraint__anon736a35580111::SyntaxConstraint112*a9ac8606Spatrick   SyntaxConstraint(const llvm::Record &R) {
113*a9ac8606Spatrick     if (R.isSubClassOf("Optional")) {
114*a9ac8606Spatrick       *this = SyntaxConstraint(*R.getValueAsDef("inner"));
115*a9ac8606Spatrick     } else if (R.isSubClassOf("AnyToken")) {
116*a9ac8606Spatrick       NodeType = "Leaf";
117*a9ac8606Spatrick     } else if (R.isSubClassOf("NodeType")) {
118*a9ac8606Spatrick       NodeType = R.getName().str();
119*a9ac8606Spatrick     } else {
120*a9ac8606Spatrick       assert(false && "Unhandled Syntax kind");
121*a9ac8606Spatrick     }
122*a9ac8606Spatrick   }
123*a9ac8606Spatrick 
124*a9ac8606Spatrick   std::string NodeType;
125*a9ac8606Spatrick   // optional and leaf types also go here, once we want to use them.
126*a9ac8606Spatrick };
127*a9ac8606Spatrick 
128*a9ac8606Spatrick } // namespace
129*a9ac8606Spatrick 
EmitClangSyntaxNodeList(llvm::RecordKeeper & Records,llvm::raw_ostream & OS)130*a9ac8606Spatrick void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
131*a9ac8606Spatrick                                     llvm::raw_ostream &OS) {
132*a9ac8606Spatrick   llvm::emitSourceFileHeader("Syntax tree node list", OS);
133*a9ac8606Spatrick   Hierarchy H(Records);
134*a9ac8606Spatrick   OS << R"cpp(
135*a9ac8606Spatrick #ifndef NODE
136*a9ac8606Spatrick #define NODE(Kind, Base)
137*a9ac8606Spatrick #endif
138*a9ac8606Spatrick 
139*a9ac8606Spatrick #ifndef CONCRETE_NODE
140*a9ac8606Spatrick #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
141*a9ac8606Spatrick #endif
142*a9ac8606Spatrick 
143*a9ac8606Spatrick #ifndef ABSTRACT_NODE
144*a9ac8606Spatrick #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
145*a9ac8606Spatrick #endif
146*a9ac8606Spatrick 
147*a9ac8606Spatrick )cpp";
148*a9ac8606Spatrick   H.visit([&](const Hierarchy::NodeType &N) {
149*a9ac8606Spatrick     // Don't emit ABSTRACT_NODE for node itself, which has no parent.
150*a9ac8606Spatrick     if (N.Base == nullptr)
151*a9ac8606Spatrick       return;
152*a9ac8606Spatrick     if (N.Derived.empty())
153*a9ac8606Spatrick       OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
154*a9ac8606Spatrick     else
155*a9ac8606Spatrick       OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
156*a9ac8606Spatrick                     N.Base->name(), firstConcrete(N).name(),
157*a9ac8606Spatrick                     lastConcrete(N).name());
158*a9ac8606Spatrick   });
159*a9ac8606Spatrick   OS << R"cpp(
160*a9ac8606Spatrick #undef NODE
161*a9ac8606Spatrick #undef CONCRETE_NODE
162*a9ac8606Spatrick #undef ABSTRACT_NODE
163*a9ac8606Spatrick )cpp";
164*a9ac8606Spatrick }
165*a9ac8606Spatrick 
166*a9ac8606Spatrick // Format a documentation string as a C++ comment.
167*a9ac8606Spatrick // Trims leading whitespace handling since comments come from a TableGen file:
168*a9ac8606Spatrick //    documentation = [{
169*a9ac8606Spatrick //      This is a widget. Example:
170*a9ac8606Spatrick //        widget.explode()
171*a9ac8606Spatrick //    }];
172*a9ac8606Spatrick // and should be formatted as:
173*a9ac8606Spatrick //    /// This is a widget. Example:
174*a9ac8606Spatrick //    ///   widget.explode()
175*a9ac8606Spatrick // Leading and trailing whitespace lines are stripped.
176*a9ac8606Spatrick // The indentation of the first line is stripped from all lines.
printDoc(llvm::StringRef Doc,llvm::raw_ostream & OS)177*a9ac8606Spatrick static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
178*a9ac8606Spatrick   Doc = Doc.rtrim();
179*a9ac8606Spatrick   llvm::StringRef Line;
180*a9ac8606Spatrick   while (Line.trim().empty() && !Doc.empty())
181*a9ac8606Spatrick     std::tie(Line, Doc) = Doc.split('\n');
182*a9ac8606Spatrick   llvm::StringRef Indent = Line.take_while(llvm::isSpace);
183*a9ac8606Spatrick   for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
184*a9ac8606Spatrick     Line.consume_front(Indent);
185*a9ac8606Spatrick     OS << "/// " << Line << "\n";
186*a9ac8606Spatrick   }
187*a9ac8606Spatrick }
188*a9ac8606Spatrick 
EmitClangSyntaxNodeClasses(llvm::RecordKeeper & Records,llvm::raw_ostream & OS)189*a9ac8606Spatrick void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
190*a9ac8606Spatrick                                        llvm::raw_ostream &OS) {
191*a9ac8606Spatrick   llvm::emitSourceFileHeader("Syntax tree node list", OS);
192*a9ac8606Spatrick   Hierarchy H(Records);
193*a9ac8606Spatrick 
194*a9ac8606Spatrick   OS << "\n// Forward-declare node types so we don't have to carefully "
195*a9ac8606Spatrick         "sequence definitions.\n";
196*a9ac8606Spatrick   H.visit([&](const Hierarchy::NodeType &N) {
197*a9ac8606Spatrick     OS << "class " << N.name() << ";\n";
198*a9ac8606Spatrick   });
199*a9ac8606Spatrick 
200*a9ac8606Spatrick   OS << "\n// Node definitions\n\n";
201*a9ac8606Spatrick   H.visit([&](const Hierarchy::NodeType &N) {
202*a9ac8606Spatrick     if (N.Record->isSubClassOf("External"))
203*a9ac8606Spatrick       return;
204*a9ac8606Spatrick     printDoc(N.Record->getValueAsString("documentation"), OS);
205*a9ac8606Spatrick     OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
206*a9ac8606Spatrick                   N.Derived.empty() ? " final" : "", N.Base->name());
207*a9ac8606Spatrick 
208*a9ac8606Spatrick     // Constructor.
209*a9ac8606Spatrick     if (N.Derived.empty())
210*a9ac8606Spatrick       OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
211*a9ac8606Spatrick                     N.Base->name());
212*a9ac8606Spatrick     else
213*a9ac8606Spatrick       OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
214*a9ac8606Spatrick                     N.name(), N.Base->name());
215*a9ac8606Spatrick 
216*a9ac8606Spatrick     if (N.Record->isSubClassOf("Sequence")) {
217*a9ac8606Spatrick       // Getters for sequence elements.
218*a9ac8606Spatrick       for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
219*a9ac8606Spatrick         assert(C->isSubClassOf("Role"));
220*a9ac8606Spatrick         llvm::StringRef Role = C->getValueAsString("role");
221*a9ac8606Spatrick         SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
222*a9ac8606Spatrick         for (const char *Const : {"", "const "})
223*a9ac8606Spatrick           OS << formatv(
224*a9ac8606Spatrick               "  {2}{1} *get{0}() {2} {{\n"
225*a9ac8606Spatrick               "    return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
226*a9ac8606Spatrick               "  }\n",
227*a9ac8606Spatrick               Role, Constraint.NodeType, Const);
228*a9ac8606Spatrick       }
229*a9ac8606Spatrick     }
230*a9ac8606Spatrick 
231*a9ac8606Spatrick     // classof. FIXME: move definition inline once ~all nodes are generated.
232*a9ac8606Spatrick     OS << "  static bool classof(const Node *N);\n";
233*a9ac8606Spatrick 
234*a9ac8606Spatrick     OS << "};\n\n";
235*a9ac8606Spatrick   });
236*a9ac8606Spatrick }
237