1 //===- Tree.cpp -----------------------------------------------*- C++ -*-=====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "clang/Tooling/Syntax/Tree.h"
9 #include "clang/Basic/TokenKinds.h"
10 #include "clang/Tooling/Syntax/Nodes.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/BitVector.h"
13 #include "llvm/ADT/STLExtras.h"
14 #include "llvm/Support/Casting.h"
15 #include <cassert>
16 
17 using namespace clang;
18 
19 namespace {
20 static void traverse(const syntax::Node *N,
21                      llvm::function_ref<void(const syntax::Node *)> Visit) {
22   if (auto *T = dyn_cast<syntax::Tree>(N)) {
23     for (const syntax::Node &C : T->getChildren())
24       traverse(&C, Visit);
25   }
26   Visit(N);
27 }
28 static void traverse(syntax::Node *N,
29                      llvm::function_ref<void(syntax::Node *)> Visit) {
30   traverse(static_cast<const syntax::Node *>(N), [&](const syntax::Node *N) {
31     Visit(const_cast<syntax::Node *>(N));
32   });
33 }
34 } // namespace
35 
36 syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
37                      const TokenBuffer &Tokens)
38     : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
39 
40 const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
41   return Tokens;
42 }
43 
44 std::pair<FileID, ArrayRef<syntax::Token>>
45 syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
46   auto FID = SourceMgr.createFileID(std::move(Input));
47   auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
48   assert(It.second && "duplicate FileID");
49   return {FID, It.first->second};
50 }
51 
52 syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
53   assert(Tok != nullptr);
54 }
55 
56 syntax::Node::Node(NodeKind Kind)
57     : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
58       Kind(static_cast<unsigned>(Kind)), Role(0), Original(false),
59       CanModify(false) {
60   this->setRole(NodeRole::Detached);
61 }
62 
63 bool syntax::Node::isDetached() const {
64   return getRole() == NodeRole::Detached;
65 }
66 
67 void syntax::Node::setRole(NodeRole NR) {
68   this->Role = static_cast<unsigned>(NR);
69 }
70 
71 void syntax::Tree::appendChildLowLevel(Node *Child, NodeRole Role) {
72   assert(Child->getRole() == NodeRole::Detached);
73   assert(Role != NodeRole::Detached);
74 
75   Child->setRole(Role);
76   appendChildLowLevel(Child);
77 }
78 
79 void syntax::Tree::appendChildLowLevel(Node *Child) {
80   assert(Child->Parent == nullptr);
81   assert(Child->NextSibling == nullptr);
82   assert(Child->PreviousSibling == nullptr);
83   assert(Child->getRole() != NodeRole::Detached);
84 
85   Child->Parent = this;
86   if (this->LastChild) {
87     Child->PreviousSibling = this->LastChild;
88     this->LastChild->NextSibling = Child;
89   } else
90     this->FirstChild = Child;
91 
92   this->LastChild = Child;
93 }
94 
95 void syntax::Tree::prependChildLowLevel(Node *Child, NodeRole Role) {
96   assert(Child->getRole() == NodeRole::Detached);
97   assert(Role != NodeRole::Detached);
98 
99   Child->setRole(Role);
100   prependChildLowLevel(Child);
101 }
102 
103 void syntax::Tree::prependChildLowLevel(Node *Child) {
104   assert(Child->Parent == nullptr);
105   assert(Child->NextSibling == nullptr);
106   assert(Child->PreviousSibling == nullptr);
107   assert(Child->getRole() != NodeRole::Detached);
108 
109   Child->Parent = this;
110   if (this->FirstChild) {
111     Child->NextSibling = this->FirstChild;
112     this->FirstChild->PreviousSibling = Child;
113   } else
114     this->LastChild = Child;
115 
116   this->FirstChild = Child;
117 }
118 
119 void syntax::Tree::replaceChildRangeLowLevel(Node *Begin, Node *End,
120                                              Node *New) {
121   assert((!Begin || Begin->Parent == this) &&
122          "`Begin` is not a child of `this`.");
123   assert((!End || End->Parent == this) && "`End` is not a child of `this`.");
124   assert(canModify() && "Cannot modify `this`.");
125 
126 #ifndef NDEBUG
127   for (auto *N = New; N; N = N->NextSibling) {
128     assert(N->Parent == nullptr);
129     assert(N->getRole() != NodeRole::Detached && "Roles must be set");
130     // FIXME: validate the role.
131   }
132 
133   auto Reachable = [](Node *From, Node *N) {
134     if (!N)
135       return true;
136     for (auto *It = From; It; It = It->NextSibling)
137       if (It == N)
138         return true;
139     return false;
140   };
141   assert(Reachable(FirstChild, Begin) && "`Begin` is not reachable.");
142   assert(Reachable(Begin, End) && "`End` is not after `Begin`.");
143 #endif
144 
145   if (!New && Begin == End)
146     return;
147 
148   // Mark modification.
149   for (auto *T = this; T && T->Original; T = T->Parent)
150     T->Original = false;
151 
152   // Save the node before the range to be removed. Later we insert the `New`
153   // range after this node.
154   auto *BeforeBegin = Begin ? Begin->PreviousSibling : LastChild;
155 
156   // Detach old nodes.
157   for (auto *N = Begin; N != End;) {
158     auto *Next = N->NextSibling;
159 
160     N->setRole(NodeRole::Detached);
161     N->Parent = nullptr;
162     N->NextSibling = nullptr;
163     N->PreviousSibling = nullptr;
164     if (N->Original)
165       traverse(N, [](Node *C) { C->Original = false; });
166 
167     N = Next;
168   }
169 
170   // Attach new range.
171   auto *&NewFirst = BeforeBegin ? BeforeBegin->NextSibling : FirstChild;
172   auto *&NewLast = End ? End->PreviousSibling : LastChild;
173 
174   if (!New) {
175     NewFirst = End;
176     NewLast = BeforeBegin;
177     return;
178   }
179 
180   New->PreviousSibling = BeforeBegin;
181   NewFirst = New;
182 
183   Node *LastInNew;
184   for (auto *N = New; N != nullptr; N = N->NextSibling) {
185     LastInNew = N;
186     N->Parent = this;
187   }
188   LastInNew->NextSibling = End;
189   NewLast = LastInNew;
190 }
191 
192 namespace {
193 static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
194                      const SourceManager &SM) {
195   assert(L);
196   const auto *Token = L->getToken();
197   assert(Token);
198   // Handle 'eof' separately, calling text() on it produces an empty string.
199   if (Token->kind() == tok::eof)
200     OS << "<eof>";
201   else
202     OS << Token->text(SM);
203 }
204 
205 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
206                      const SourceManager &SM, llvm::BitVector IndentMask) {
207   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
208     if (N->getRole() != syntax::NodeRole::Unknown)
209       OS << " " << N->getRole();
210     if (!N->isOriginal())
211       OS << " synthesized";
212     if (!N->canModify())
213       OS << " unmodifiable";
214   };
215 
216   assert(N);
217   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
218     OS << "'";
219     dumpLeaf(OS, L, SM);
220     OS << "'";
221     DumpExtraInfo(N);
222     OS << "\n";
223     return;
224   }
225 
226   const auto *T = cast<syntax::Tree>(N);
227   OS << T->getKind();
228   DumpExtraInfo(N);
229   OS << "\n";
230 
231   for (const syntax::Node &It : T->getChildren()) {
232     for (unsigned Idx = 0; Idx < IndentMask.size(); ++Idx) {
233       if (IndentMask[Idx])
234         OS << "| ";
235       else
236         OS << "  ";
237     }
238     if (!It.getNextSibling()) {
239       OS << "`-";
240       IndentMask.push_back(false);
241     } else {
242       OS << "|-";
243       IndentMask.push_back(true);
244     }
245     dumpNode(OS, &It, SM, IndentMask);
246     IndentMask.pop_back();
247   }
248 }
249 } // namespace
250 
251 std::string syntax::Node::dump(const SourceManager &SM) const {
252   std::string Str;
253   llvm::raw_string_ostream OS(Str);
254   dumpNode(OS, this, SM, /*IndentMask=*/{});
255   return std::move(OS.str());
256 }
257 
258 std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
259   std::string Storage;
260   llvm::raw_string_ostream OS(Storage);
261   traverse(this, [&](const syntax::Node *N) {
262     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
263       dumpLeaf(OS, L, SM);
264       OS << " ";
265     }
266   });
267   return Storage;
268 }
269 
270 void syntax::Node::assertInvariants() const {
271 #ifndef NDEBUG
272   if (isDetached())
273     assert(getParent() == nullptr);
274   else
275     assert(getParent() != nullptr);
276 
277   const auto *T = dyn_cast<Tree>(this);
278   if (!T)
279     return;
280   for (const Node &C : T->getChildren()) {
281     if (T->isOriginal())
282       assert(C.isOriginal());
283     assert(!C.isDetached());
284     assert(C.getParent() == T);
285     const auto *Next = C.getNextSibling();
286     assert(!Next || &C == Next->getPreviousSibling());
287     if (!C.getNextSibling())
288       assert(&C == T->getLastChild() &&
289              "Last child is reachable by advancing from the first child.");
290   }
291 
292   const auto *L = dyn_cast<List>(T);
293   if (!L)
294     return;
295   for (const Node &C : T->getChildren()) {
296     assert(C.getRole() == NodeRole::ListElement ||
297            C.getRole() == NodeRole::ListDelimiter);
298     if (C.getRole() == NodeRole::ListDelimiter) {
299       assert(isa<Leaf>(C));
300       assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
301     }
302   }
303 
304 #endif
305 }
306 
307 void syntax::Node::assertInvariantsRecursive() const {
308 #ifndef NDEBUG
309   traverse(this, [&](const syntax::Node *N) { N->assertInvariants(); });
310 #endif
311 }
312 
313 const syntax::Leaf *syntax::Tree::findFirstLeaf() const {
314   for (const Node &C : getChildren()) {
315     if (const auto *L = dyn_cast<syntax::Leaf>(&C))
316       return L;
317     if (const auto *L = cast<syntax::Tree>(C).findFirstLeaf())
318       return L;
319   }
320   return nullptr;
321 }
322 
323 const syntax::Leaf *syntax::Tree::findLastLeaf() const {
324   for (const auto *C = getLastChild(); C; C = C->getPreviousSibling()) {
325     if (const auto *L = dyn_cast<syntax::Leaf>(C))
326       return L;
327     if (const auto *L = cast<syntax::Tree>(C)->findLastLeaf())
328       return L;
329   }
330   return nullptr;
331 }
332 
333 const syntax::Node *syntax::Tree::findChild(NodeRole R) const {
334   for (const Node &C : getChildren()) {
335     if (C.getRole() == R)
336       return &C;
337   }
338   return nullptr;
339 }
340 
341 std::vector<syntax::List::ElementAndDelimiter<syntax::Node>>
342 syntax::List::getElementsAsNodesAndDelimiters() {
343   if (!getFirstChild())
344     return {};
345 
346   std::vector<syntax::List::ElementAndDelimiter<Node>> Children;
347   syntax::Node *ElementWithoutDelimiter = nullptr;
348   for (Node &C : getChildren()) {
349     switch (C.getRole()) {
350     case syntax::NodeRole::ListElement: {
351       if (ElementWithoutDelimiter) {
352         Children.push_back({ElementWithoutDelimiter, nullptr});
353       }
354       ElementWithoutDelimiter = &C;
355       break;
356     }
357     case syntax::NodeRole::ListDelimiter: {
358       Children.push_back({ElementWithoutDelimiter, cast<syntax::Leaf>(&C)});
359       ElementWithoutDelimiter = nullptr;
360       break;
361     }
362     default:
363       llvm_unreachable(
364           "A list can have only elements and delimiters as children.");
365     }
366   }
367 
368   switch (getTerminationKind()) {
369   case syntax::List::TerminationKind::Separated: {
370     Children.push_back({ElementWithoutDelimiter, nullptr});
371     break;
372   }
373   case syntax::List::TerminationKind::Terminated:
374   case syntax::List::TerminationKind::MaybeTerminated: {
375     if (ElementWithoutDelimiter) {
376       Children.push_back({ElementWithoutDelimiter, nullptr});
377     }
378     break;
379   }
380   }
381 
382   return Children;
383 }
384 
385 // Almost the same implementation of `getElementsAsNodesAndDelimiters` but
386 // ignoring delimiters
387 std::vector<syntax::Node *> syntax::List::getElementsAsNodes() {
388   if (!getFirstChild())
389     return {};
390 
391   std::vector<syntax::Node *> Children;
392   syntax::Node *ElementWithoutDelimiter = nullptr;
393   for (Node &C : getChildren()) {
394     switch (C.getRole()) {
395     case syntax::NodeRole::ListElement: {
396       if (ElementWithoutDelimiter) {
397         Children.push_back(ElementWithoutDelimiter);
398       }
399       ElementWithoutDelimiter = &C;
400       break;
401     }
402     case syntax::NodeRole::ListDelimiter: {
403       Children.push_back(ElementWithoutDelimiter);
404       ElementWithoutDelimiter = nullptr;
405       break;
406     }
407     default:
408       llvm_unreachable("A list has only elements or delimiters.");
409     }
410   }
411 
412   switch (getTerminationKind()) {
413   case syntax::List::TerminationKind::Separated: {
414     Children.push_back(ElementWithoutDelimiter);
415     break;
416   }
417   case syntax::List::TerminationKind::Terminated:
418   case syntax::List::TerminationKind::MaybeTerminated: {
419     if (ElementWithoutDelimiter) {
420       Children.push_back(ElementWithoutDelimiter);
421     }
422     break;
423   }
424   }
425 
426   return Children;
427 }
428 
429 clang::tok::TokenKind syntax::List::getDelimiterTokenKind() const {
430   switch (this->getKind()) {
431   case NodeKind::NestedNameSpecifier:
432     return clang::tok::coloncolon;
433   case NodeKind::CallArguments:
434   case NodeKind::ParameterDeclarationList:
435   case NodeKind::DeclaratorList:
436     return clang::tok::comma;
437   default:
438     llvm_unreachable("This is not a subclass of List, thus "
439                      "getDelimiterTokenKind() cannot be called");
440   }
441 }
442 
443 syntax::List::TerminationKind syntax::List::getTerminationKind() const {
444   switch (this->getKind()) {
445   case NodeKind::NestedNameSpecifier:
446     return TerminationKind::Terminated;
447   case NodeKind::CallArguments:
448   case NodeKind::ParameterDeclarationList:
449   case NodeKind::DeclaratorList:
450     return TerminationKind::Separated;
451   default:
452     llvm_unreachable("This is not a subclass of List, thus "
453                      "getTerminationKind() cannot be called");
454   }
455 }
456 
457 bool syntax::List::canBeEmpty() const {
458   switch (this->getKind()) {
459   case NodeKind::NestedNameSpecifier:
460     return false;
461   case NodeKind::CallArguments:
462     return true;
463   case NodeKind::ParameterDeclarationList:
464     return true;
465   case NodeKind::DeclaratorList:
466     return true;
467   default:
468     llvm_unreachable("This is not a subclass of List, thus canBeEmpty() "
469                      "cannot be called");
470   }
471 }
472