1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Selection.h"
10 #include "SourceCode.h"
11 #include "support/Logger.h"
12 #include "support/Trace.h"
13 #include "clang/AST/ASTTypeTraits.h"
14 #include "clang/AST/Decl.h"
15 #include "clang/AST/DeclCXX.h"
16 #include "clang/AST/Expr.h"
17 #include "clang/AST/ExprCXX.h"
18 #include "clang/AST/PrettyPrinter.h"
19 #include "clang/AST/RecursiveASTVisitor.h"
20 #include "clang/AST/TypeLoc.h"
21 #include "clang/Basic/OperatorKinds.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TokenKinds.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Tooling/Syntax/Tokens.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <algorithm>
32 #include <string>
33 
34 namespace clang {
35 namespace clangd {
36 namespace {
37 using Node = SelectionTree::Node;
38 
39 // Measure the fraction of selections that were enabled by recovery AST.
recordMetrics(const SelectionTree & S,const LangOptions & Lang)40 void recordMetrics(const SelectionTree &S, const LangOptions &Lang) {
41   if (!trace::enabled())
42     return;
43   const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C";
44   static constexpr trace::Metric SelectionUsedRecovery(
45       "selection_recovery", trace::Metric::Distribution, "language");
46   static constexpr trace::Metric RecoveryType(
47       "selection_recovery_type", trace::Metric::Distribution, "language");
48   const auto *Common = S.commonAncestor();
49   for (const auto *N = Common; N; N = N->Parent) {
50     if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
51       SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast.
52       RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel);
53       return;
54     }
55   }
56   if (Common)
57     SelectionUsedRecovery.record(0, LanguageLabel); // unused.
58 }
59 
60 // An IntervalSet maintains a set of disjoint subranges of an array.
61 //
62 // Initially, it contains the entire array.
63 //           [-----------------------------------------------------------]
64 //
65 // When a range is erased(), it will typically split the array in two.
66 //  Claim:                     [--------------------]
67 //  after:   [----------------]                      [-------------------]
68 //
69 // erase() returns the segments actually erased. Given the state above:
70 //  Claim:          [---------------------------------------]
71 //  Out:            [---------]                      [------]
72 //  After:   [-----]                                         [-----------]
73 //
74 // It is used to track (expanded) tokens not yet associated with an AST node.
75 // On traversing an AST node, its token range is erased from the unclaimed set.
76 // The tokens actually removed are associated with that node, and hit-tested
77 // against the selection to determine whether the node is selected.
78 template <typename T> class IntervalSet {
79 public:
IntervalSet(llvm::ArrayRef<T> Range)80   IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
81 
82   // Removes the elements of Claim from the set, modifying or removing ranges
83   // that overlap it.
84   // Returns the continuous subranges of Claim that were actually removed.
erase(llvm::ArrayRef<T> Claim)85   llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) {
86     llvm::SmallVector<llvm::ArrayRef<T>> Out;
87     if (Claim.empty())
88       return Out;
89 
90     // General case:
91     // Claim:                   [-----------------]
92     // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
93     // Overlap:               ^first                  ^second
94     // Ranges C and D are fully included. Ranges B and E must be trimmed.
95     auto Overlap = std::make_pair(
96         UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
97         UnclaimedRanges.lower_bound({Claim.end(), Claim.end()}));    // F
98     // Rewind to cover B.
99     if (Overlap.first != UnclaimedRanges.begin()) {
100       --Overlap.first;
101       // ...unless B isn't selected at all.
102       if (Overlap.first->end() <= Claim.begin())
103         ++Overlap.first;
104     }
105     if (Overlap.first == Overlap.second)
106       return Out;
107 
108     // First, copy all overlapping ranges into the output.
109     auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
110     // If any of the overlapping ranges were sliced by the claim, split them:
111     //  - restrict the returned range to the claimed part
112     //  - save the unclaimed part so it can be reinserted
113     llvm::ArrayRef<T> RemainingHead, RemainingTail;
114     if (Claim.begin() > OutFirst->begin()) {
115       RemainingHead = {OutFirst->begin(), Claim.begin()};
116       *OutFirst = {Claim.begin(), OutFirst->end()};
117     }
118     if (Claim.end() < Out.back().end()) {
119       RemainingTail = {Claim.end(), Out.back().end()};
120       Out.back() = {Out.back().begin(), Claim.end()};
121     }
122 
123     // Erase all the overlapping ranges (invalidating all iterators).
124     UnclaimedRanges.erase(Overlap.first, Overlap.second);
125     // Reinsert ranges that were merely trimmed.
126     if (!RemainingHead.empty())
127       UnclaimedRanges.insert(RemainingHead);
128     if (!RemainingTail.empty())
129       UnclaimedRanges.insert(RemainingTail);
130 
131     return Out;
132   }
133 
134 private:
135   using TokenRange = llvm::ArrayRef<T>;
136   struct RangeLess {
operator ()clang::clangd::__anonb7319a390111::IntervalSet::RangeLess137     bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
138       return L.begin() < R.begin();
139     }
140   };
141 
142   // Disjoint sorted unclaimed ranges of expanded tokens.
143   std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
144 };
145 
146 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
147 // This resolves to Unselected if no tokens are ever seen.
148 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
149 // This value is never exposed publicly.
150 constexpr SelectionTree::Selection NoTokens =
151     static_cast<SelectionTree::Selection>(
152         static_cast<unsigned char>(SelectionTree::Complete + 1));
153 
154 // Nodes start with NoTokens, and then use this function to aggregate the
155 // selectedness as more tokens are found.
update(SelectionTree::Selection & Result,SelectionTree::Selection New)156 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
157   if (New == NoTokens)
158     return;
159   if (Result == NoTokens)
160     Result = New;
161   else if (Result != New)
162     // Can only be completely selected (or unselected) if all tokens are.
163     Result = SelectionTree::Partial;
164 }
165 
166 // As well as comments, don't count semicolons as real tokens.
167 // They're not properly claimed as expr-statement is missing from the AST.
shouldIgnore(const syntax::Token & Tok)168 bool shouldIgnore(const syntax::Token &Tok) {
169   return Tok.kind() == tok::comment || Tok.kind() == tok::semi;
170 }
171 
172 // Determine whether 'Target' is the first expansion of the macro
173 // argument whose top-level spelling location is 'SpellingLoc'.
isFirstExpansion(FileID Target,SourceLocation SpellingLoc,const SourceManager & SM)174 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
175                       const SourceManager &SM) {
176   SourceLocation Prev = SpellingLoc;
177   while (true) {
178     // If the arg is expanded multiple times, getMacroArgExpandedLocation()
179     // returns the first expansion.
180     SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
181     // So if we reach the target, target is the first-expansion of the
182     // first-expansion ...
183     if (SM.getFileID(Next) == Target)
184       return true;
185 
186     // Otherwise, if the FileID stops changing, we've reached the innermost
187     // macro expansion, and Target was on a different branch.
188     if (SM.getFileID(Next) == SM.getFileID(Prev))
189       return false;
190 
191     Prev = Next;
192   }
193   return false;
194 }
195 
196 // SelectionTester can determine whether a range of tokens from the PP-expanded
197 // stream (corresponding to an AST node) is considered selected.
198 //
199 // When the tokens result from macro expansions, the appropriate tokens in the
200 // main file are examined (macro invocation or args). Similarly for #includes.
201 // However, only the first expansion of a given spelled token is considered
202 // selected.
203 //
204 // It tests each token in the range (not just the endpoints) as contiguous
205 // expanded tokens may not have contiguous spellings (with macros).
206 //
207 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
208 // are ignored when determining selectedness.
209 class SelectionTester {
210 public:
211   // The selection is offsets [SelBegin, SelEnd) in SelFile.
SelectionTester(const syntax::TokenBuffer & Buf,FileID SelFile,unsigned SelBegin,unsigned SelEnd,const SourceManager & SM)212   SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
213                   unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
214       : SelFile(SelFile), SM(SM) {
215     // Find all tokens (partially) selected in the file.
216     auto AllSpelledTokens = Buf.spelledTokens(SelFile);
217     const syntax::Token *SelFirst =
218         llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
219           return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
220         });
221     const syntax::Token *SelLimit = std::partition_point(
222         SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
223           return SM.getFileOffset(Tok.location()) < SelEnd;
224         });
225     auto Sel = llvm::makeArrayRef(SelFirst, SelLimit);
226     // Find which of these are preprocessed to nothing and should be ignored.
227     std::vector<bool> PPIgnored(Sel.size(), false);
228     for (const syntax::TokenBuffer::Expansion &X :
229          Buf.expansionsOverlapping(Sel)) {
230       if (X.Expanded.empty()) {
231         for (const syntax::Token &Tok : X.Spelled) {
232           if (&Tok >= SelFirst && &Tok < SelLimit)
233             PPIgnored[&Tok - SelFirst] = true;
234         }
235       }
236     }
237     // Precompute selectedness and offset for selected spelled tokens.
238     for (unsigned I = 0; I < Sel.size(); ++I) {
239       if (shouldIgnore(Sel[I]) || PPIgnored[I])
240         continue;
241       SpelledTokens.emplace_back();
242       Tok &S = SpelledTokens.back();
243       S.Offset = SM.getFileOffset(Sel[I].location());
244       if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
245         S.Selected = SelectionTree::Complete;
246       else
247         S.Selected = SelectionTree::Partial;
248     }
249   }
250 
251   // Test whether a consecutive range of tokens is selected.
252   // The tokens are taken from the expanded token stream.
253   SelectionTree::Selection
test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const254   test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
255     if (SpelledTokens.empty())
256       return NoTokens;
257     SelectionTree::Selection Result = NoTokens;
258     while (!ExpandedTokens.empty()) {
259       // Take consecutive tokens from the same context together for efficiency.
260       FileID FID = SM.getFileID(ExpandedTokens.front().location());
261       auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
262         return SM.getFileID(T.location()) == FID;
263       });
264       assert(!Batch.empty());
265       ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
266 
267       update(Result, testChunk(FID, Batch));
268     }
269     return Result;
270   }
271 
272   // Cheap check whether any of the tokens in R might be selected.
273   // If it returns false, test() will return NoTokens or Unselected.
274   // If it returns true, test() may return any value.
mayHit(SourceRange R) const275   bool mayHit(SourceRange R) const {
276     if (SpelledTokens.empty())
277       return false;
278     auto B = SM.getDecomposedLoc(R.getBegin());
279     auto E = SM.getDecomposedLoc(R.getEnd());
280     if (B.first == SelFile && E.first == SelFile)
281       if (E.second < SpelledTokens.front().Offset ||
282           B.second > SpelledTokens.back().Offset)
283         return false;
284     return true;
285   }
286 
287 private:
288   // Hit-test a consecutive range of tokens from a single file ID.
289   SelectionTree::Selection
testChunk(FileID FID,llvm::ArrayRef<syntax::Token> Batch) const290   testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
291     assert(!Batch.empty());
292     SourceLocation StartLoc = Batch.front().location();
293     // There are several possible categories of FileID depending on how the
294     // preprocessor was used to generate these tokens:
295     //   main file, #included file, macro args, macro bodies.
296     // We need to identify the main-file tokens that represent Batch, and
297     // determine whether we want to exclusively claim them. Regular tokens
298     // represent one AST construct, but a macro invocation can represent many.
299 
300     // Handle tokens written directly in the main file.
301     if (FID == SelFile) {
302       return testTokenRange(SM.getFileOffset(Batch.front().location()),
303                             SM.getFileOffset(Batch.back().location()));
304     }
305 
306     // Handle tokens in another file #included into the main file.
307     // Check if the #include is selected, but don't claim it exclusively.
308     if (StartLoc.isFileID()) {
309       for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
310            Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
311         if (SM.getFileID(Loc) == SelFile)
312           // FIXME: use whole #include directive, not just the filename string.
313           return testToken(SM.getFileOffset(Loc));
314       }
315       return NoTokens;
316     }
317 
318     assert(StartLoc.isMacroID());
319     // Handle tokens that were passed as a macro argument.
320     SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
321     if (SM.getFileID(ArgStart) == SelFile) {
322       if (isFirstExpansion(FID, ArgStart, SM)) {
323         SourceLocation ArgEnd =
324             SM.getTopMacroCallerLoc(Batch.back().location());
325         return testTokenRange(SM.getFileOffset(ArgStart),
326                               SM.getFileOffset(ArgEnd));
327       } else {
328         /* fall through and treat as part of the macro body */
329       }
330     }
331 
332     // Handle tokens produced by non-argument macro expansion.
333     // Check if the macro name is selected, don't claim it exclusively.
334     auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
335     if (Expansion.first == SelFile)
336       // FIXME: also check ( and ) for function-like macros?
337       return testToken(Expansion.second);
338     else
339       return NoTokens;
340   }
341 
342   // Is the closed token range [Begin, End] selected?
testTokenRange(unsigned Begin,unsigned End) const343   SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
344     assert(Begin <= End);
345     // Outside the selection entirely?
346     if (End < SpelledTokens.front().Offset ||
347         Begin > SpelledTokens.back().Offset)
348       return SelectionTree::Unselected;
349 
350     // Compute range of tokens.
351     auto B = llvm::partition_point(
352         SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
353     auto E = std::partition_point(
354         B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
355 
356     // Aggregate selectedness of tokens in range.
357     bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
358                                    End > SpelledTokens.back().Offset;
359     SelectionTree::Selection Result =
360         ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
361     for (auto It = B; It != E; ++It)
362       update(Result, It->Selected);
363     return Result;
364   }
365 
366   // Is the token at `Offset` selected?
testToken(unsigned Offset) const367   SelectionTree::Selection testToken(unsigned Offset) const {
368     // Outside the selection entirely?
369     if (Offset < SpelledTokens.front().Offset ||
370         Offset > SpelledTokens.back().Offset)
371       return SelectionTree::Unselected;
372     // Find the token, if it exists.
373     auto It = llvm::partition_point(
374         SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
375     if (It != SpelledTokens.end() && It->Offset == Offset)
376       return It->Selected;
377     return NoTokens;
378   }
379 
380   struct Tok {
381     unsigned Offset;
382     SelectionTree::Selection Selected;
383   };
384   std::vector<Tok> SpelledTokens;
385   FileID SelFile;
386   const SourceManager &SM;
387 };
388 
389 // Show the type of a node for debugging.
printNodeKind(llvm::raw_ostream & OS,const DynTypedNode & N)390 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
391   if (const TypeLoc *TL = N.get<TypeLoc>()) {
392     // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
393     // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
394     if (TL->getTypeLocClass() == TypeLoc::Qualified)
395       OS << "QualifiedTypeLoc";
396     else
397       OS << TL->getType()->getTypeClassName() << "TypeLoc";
398   } else {
399     OS << N.getNodeKind().asStringRef();
400   }
401 }
402 
403 #ifndef NDEBUG
printNodeToString(const DynTypedNode & N,const PrintingPolicy & PP)404 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
405   std::string S;
406   llvm::raw_string_ostream OS(S);
407   printNodeKind(OS, N);
408   OS << " ";
409   return std::move(OS.str());
410 }
411 #endif
412 
isImplicit(const Stmt * S)413 bool isImplicit(const Stmt *S) {
414   // Some Stmts are implicit and shouldn't be traversed, but there's no
415   // "implicit" attribute on Stmt/Expr.
416   // Unwrap implicit casts first if present (other nodes too?).
417   if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
418     S = ICE->getSubExprAsWritten();
419   // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
420   // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
421   if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
422     if (CTI->isImplicit())
423       return true;
424   // Refs to operator() and [] are (almost?) always implicit as part of calls.
425   if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
426     if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
427       switch (FD->getOverloadedOperator()) {
428       case OO_Call:
429       case OO_Subscript:
430         return true;
431       default:
432         break;
433       }
434     }
435   }
436   return false;
437 }
438 
439 // We find the selection by visiting written nodes in the AST, looking for nodes
440 // that intersect with the selected character range.
441 //
442 // While traversing, we maintain a parent stack. As nodes pop off the stack,
443 // we decide whether to keep them or not. To be kept, they must either be
444 // selected or contain some nodes that are.
445 //
446 // For simple cases (not inside macros) we prune subtrees that don't intersect.
447 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
448 public:
449   // Runs the visitor to gather selected nodes and their ancestors.
450   // If there is any selection, the root (TUDecl) is the first node.
collect(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned Begin,unsigned End,FileID File)451   static std::deque<Node> collect(ASTContext &AST,
452                                   const syntax::TokenBuffer &Tokens,
453                                   const PrintingPolicy &PP, unsigned Begin,
454                                   unsigned End, FileID File) {
455     SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
456     V.TraverseAST(AST);
457     assert(V.Stack.size() == 1 && "Unpaired push/pop?");
458     assert(V.Stack.top() == &V.Nodes.front());
459     return std::move(V.Nodes);
460   }
461 
462   // We traverse all "well-behaved" nodes the same way:
463   //  - push the node onto the stack
464   //  - traverse its children recursively
465   //  - pop it from the stack
466   //  - hit testing: is intersection(node, selection) - union(children) empty?
467   //  - attach it to the tree if it or any children hit the selection
468   //
469   // Two categories of nodes are not "well-behaved":
470   //  - those without source range information, we don't record those
471   //  - those that can't be stored in DynTypedNode.
472   // We're missing some interesting things like Attr due to the latter.
TraverseDecl(Decl * X)473   bool TraverseDecl(Decl *X) {
474     if (X && isa<TranslationUnitDecl>(X))
475       return Base::TraverseDecl(X); // Already pushed by constructor.
476     // Base::TraverseDecl will suppress children, but not this node itself.
477     if (X && X->isImplicit())
478       return true;
479     return traverseNode(X, [&] { return Base::TraverseDecl(X); });
480   }
TraverseTypeLoc(TypeLoc X)481   bool TraverseTypeLoc(TypeLoc X) {
482     return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
483   }
TraverseTemplateArgumentLoc(const TemplateArgumentLoc & X)484   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) {
485     return traverseNode(&X,
486                         [&] { return Base::TraverseTemplateArgumentLoc(X); });
487   }
TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X)488   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
489     return traverseNode(
490         &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
491   }
TraverseConstructorInitializer(CXXCtorInitializer * X)492   bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
493     return traverseNode(
494         X, [&] { return Base::TraverseConstructorInitializer(X); });
495   }
TraverseCXXBaseSpecifier(const CXXBaseSpecifier & X)496   bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) {
497     return traverseNode(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); });
498   }
499   // Stmt is the same, but this form allows the data recursion optimization.
dataTraverseStmtPre(Stmt * X)500   bool dataTraverseStmtPre(Stmt *X) {
501     if (!X || isImplicit(X))
502       return false;
503     auto N = DynTypedNode::create(*X);
504     if (canSafelySkipNode(N))
505       return false;
506     push(std::move(N));
507     if (shouldSkipChildren(X)) {
508       pop();
509       return false;
510     }
511     return true;
512   }
dataTraverseStmtPost(Stmt * X)513   bool dataTraverseStmtPost(Stmt *X) {
514     pop();
515     return true;
516   }
517   // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
518   // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
519   // This means we'd never see 'int' in 'const int'! Work around that here.
520   // (The reason for the behavior is to avoid traversing the nested Type twice,
521   // but we ignore TraverseType anyway).
TraverseQualifiedTypeLoc(QualifiedTypeLoc QX)522   bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
523     return traverseNode<TypeLoc>(
524         &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
525   }
526   // Uninteresting parts of the AST that don't have locations within them.
TraverseNestedNameSpecifier(NestedNameSpecifier *)527   bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
TraverseType(QualType)528   bool TraverseType(QualType) { return true; }
529 
530   // The DeclStmt for the loop variable claims to cover the whole range
531   // inside the parens, this causes the range-init expression to not be hit.
532   // Traverse the loop VarDecl instead, which has the right source range.
TraverseCXXForRangeStmt(CXXForRangeStmt * S)533   bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
534     return traverseNode(S, [&] {
535       return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
536              TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
537     });
538   }
539   // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
TraverseOpaqueValueExpr(OpaqueValueExpr * E)540   bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
541     return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
542   }
543   // We only want to traverse the *syntactic form* to understand the selection.
TraversePseudoObjectExpr(PseudoObjectExpr * E)544   bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
545     return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
546   }
547 
548 private:
549   using Base = RecursiveASTVisitor<SelectionVisitor>;
550 
SelectionVisitor(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned SelBegin,unsigned SelEnd,FileID SelFile)551   SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
552                    const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
553                    FileID SelFile)
554       : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
555 #ifndef NDEBUG
556         PrintPolicy(PP),
557 #endif
558         TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
559         UnclaimedExpandedTokens(Tokens.expandedTokens()) {
560     // Ensure we have a node for the TU decl, regardless of traversal scope.
561     Nodes.emplace_back();
562     Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
563     Nodes.back().Parent = nullptr;
564     Nodes.back().Selected = SelectionTree::Unselected;
565     Stack.push(&Nodes.back());
566   }
567 
568   // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
569   // Node is always a pointer so the generic code can handle any null checks.
570   template <typename T, typename Func>
traverseNode(T * Node,const Func & Body)571   bool traverseNode(T *Node, const Func &Body) {
572     if (Node == nullptr)
573       return true;
574     auto N = DynTypedNode::create(*Node);
575     if (canSafelySkipNode(N))
576       return true;
577     push(DynTypedNode::create(*Node));
578     bool Ret = Body();
579     pop();
580     return Ret;
581   }
582 
583   // HIT TESTING
584   //
585   // We do rough hit testing on the way down the tree to avoid traversing
586   // subtrees that don't touch the selection (canSafelySkipNode), but
587   // fine-grained hit-testing is mostly done on the way back up (in pop()).
588   // This means children get to claim parts of the selection first, and parents
589   // are only selected if they own tokens that no child owned.
590   //
591   // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
592   // parent's, and a node (transitively) owns all tokens in its range.
593   //
594   // Exception 1: child range claims tokens that should be owned by the parent.
595   //              e.g. in `void foo(int);`, the FunctionTypeLoc should own
596   //              `void (int)` but the parent FunctionDecl should own `foo`.
597   // To handle this case, certain nodes claim small token ranges *before*
598   // their children are traversed. (see earlySourceRange).
599   //
600   // Exception 2: siblings both claim the same node.
601   //              e.g. `int x, y;` produces two sibling VarDecls.
602   //                    ~~~~~ x
603   //                    ~~~~~~~~ y
604   // Here the first ("leftmost") sibling claims the tokens it wants, and the
605   // other sibling gets what's left. So selecting "int" only includes the left
606   // VarDecl in the selection tree.
607 
608   // An optimization for a common case: nodes outside macro expansions that
609   // don't intersect the selection may be recursively skipped.
canSafelySkipNode(const DynTypedNode & N)610   bool canSafelySkipNode(const DynTypedNode &N) {
611     SourceRange S = N.getSourceRange();
612     if (auto *TL = N.get<TypeLoc>()) {
613       // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
614       // heuristics. We should consider only pruning critical TypeLoc nodes, to
615       // be more robust.
616 
617       // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
618       // failing
619       // to descend into the child expression.
620       // decltype(2+2);
621       // ~~~~~~~~~~~~~ <-- correct range
622       // ~~~~~~~~      <-- range reported by getSourceRange()
623       // ~~~~~~~~~~~~  <-- range with this hack(i.e, missing closing paren)
624       // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
625       // rid of this patch.
626       if (auto DT = TL->getAs<DecltypeTypeLoc>())
627         S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
628       // AttributedTypeLoc may point to the attribute's range, NOT the modified
629       // type's range.
630       if (auto AT = TL->getAs<AttributedTypeLoc>())
631         S = AT.getModifiedLoc().getSourceRange();
632     }
633     if (!SelChecker.mayHit(S)) {
634       dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
635       dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
636       return true;
637     }
638     return false;
639   }
640 
641   // There are certain nodes we want to treat as leaves in the SelectionTree,
642   // although they do have children.
shouldSkipChildren(const Stmt * X) const643   bool shouldSkipChildren(const Stmt *X) const {
644     // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
645     // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
646     // So we treat UserDefinedLiteral as a leaf node, owning the token.
647     return llvm::isa<UserDefinedLiteral>(X);
648   }
649 
650   // Pushes a node onto the ancestor stack. Pairs with pop().
651   // Performs early hit detection for some nodes (on the earlySourceRange).
push(DynTypedNode Node)652   void push(DynTypedNode Node) {
653     SourceRange Early = earlySourceRange(Node);
654     dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
655     Nodes.emplace_back();
656     Nodes.back().ASTNode = std::move(Node);
657     Nodes.back().Parent = Stack.top();
658     Nodes.back().Selected = NoTokens;
659     Stack.push(&Nodes.back());
660     claimRange(Early, Nodes.back().Selected);
661   }
662 
663   // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
664   // Performs primary hit detection.
pop()665   void pop() {
666     Node &N = *Stack.top();
667     dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
668     claimRange(N.ASTNode.getSourceRange(), N.Selected);
669     if (N.Selected == NoTokens)
670       N.Selected = SelectionTree::Unselected;
671     if (N.Selected || !N.Children.empty()) {
672       // Attach to the tree.
673       N.Parent->Children.push_back(&N);
674     } else {
675       // Neither N any children are selected, it doesn't belong in the tree.
676       assert(&N == &Nodes.back());
677       Nodes.pop_back();
678     }
679     Stack.pop();
680   }
681 
682   // Returns the range of tokens that this node will claim directly, and
683   // is not available to the node's children.
684   // Usually empty, but sometimes children cover tokens but shouldn't own them.
earlySourceRange(const DynTypedNode & N)685   SourceRange earlySourceRange(const DynTypedNode &N) {
686     if (const Decl *D = N.get<Decl>()) {
687       // We want constructor name to be claimed by TypeLoc not the constructor
688       // itself. Similar for deduction guides, we rather want to select the
689       // underlying TypeLoc.
690       // FIXME: Unfortunately this doesn't work, even though RecursiveASTVisitor
691       // traverses the underlying TypeLoc inside DeclarationName, it is null for
692       // constructors.
693       if (isa<CXXConstructorDecl>(D) || isa<CXXDeductionGuideDecl>(D))
694         return SourceRange();
695       // This will capture Field, Function, MSProperty, NonTypeTemplateParm and
696       // VarDecls. We want the name in the declarator to be claimed by the decl
697       // and not by any children. For example:
698       // void [[foo]]();
699       // int (*[[s]])();
700       // struct X { int [[hash]] [32]; [[operator]] int();}
701       if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D))
702         return DD->getLocation();
703     } else if (const auto *CCI = N.get<CXXCtorInitializer>()) {
704       // : [[b_]](42)
705       return CCI->getMemberLocation();
706     }
707     return SourceRange();
708   }
709 
710   // Perform hit-testing of a complete Node against the selection.
711   // This runs for every node in the AST, and must be fast in common cases.
712   // This is usually called from pop(), so we can take children into account.
713   // The existing state of Result is relevant (early/late claims can interact).
claimRange(SourceRange S,SelectionTree::Selection & Result)714   void claimRange(SourceRange S, SelectionTree::Selection &Result) {
715     for (const auto &ClaimedRange :
716          UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
717       update(Result, SelChecker.test(ClaimedRange));
718 
719     if (Result && Result != NoTokens)
720       dlog("{1}hit selection: {0}", S.printToString(SM), indent());
721   }
722 
indent(int Offset=0)723   std::string indent(int Offset = 0) {
724     // Cast for signed arithmetic.
725     int Amount = int(Stack.size()) + Offset;
726     assert(Amount >= 0);
727     return std::string(Amount, ' ');
728   }
729 
730   SourceManager &SM;
731   const LangOptions &LangOpts;
732 #ifndef NDEBUG
733   const PrintingPolicy &PrintPolicy;
734 #endif
735   const syntax::TokenBuffer &TokenBuf;
736   std::stack<Node *> Stack;
737   SelectionTester SelChecker;
738   IntervalSet<syntax::Token> UnclaimedExpandedTokens;
739   std::deque<Node> Nodes; // Stable pointers as we add more nodes.
740 };
741 
742 } // namespace
743 
abbreviatedString(DynTypedNode N,const PrintingPolicy & PP)744 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
745                                          const PrintingPolicy &PP) {
746   llvm::SmallString<256> Result;
747   {
748     llvm::raw_svector_ostream OS(Result);
749     N.print(OS, PP);
750   }
751   auto Pos = Result.find('\n');
752   if (Pos != llvm::StringRef::npos) {
753     bool MoreText =
754         !llvm::all_of(llvm::StringRef(Result).drop_front(Pos), llvm::isSpace);
755     Result.resize(Pos);
756     if (MoreText)
757       Result.append(" …");
758   }
759   return Result;
760 }
761 
print(llvm::raw_ostream & OS,const SelectionTree::Node & N,int Indent) const762 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
763                           int Indent) const {
764   if (N.Selected)
765     OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
766                                                                     : '.');
767   else
768     OS.indent(Indent);
769   printNodeKind(OS, N.ASTNode);
770   OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
771   for (const Node *Child : N.Children)
772     print(OS, *Child, Indent + 2);
773 }
774 
kind() const775 std::string SelectionTree::Node::kind() const {
776   std::string S;
777   llvm::raw_string_ostream OS(S);
778   printNodeKind(OS, ASTNode);
779   return std::move(OS.str());
780 }
781 
782 // Decide which selections emulate a "point" query in between characters.
783 // If it's ambiguous (the neighboring characters are selectable tokens), returns
784 // both possibilities in preference order.
785 // Always returns at least one range - if no tokens touched, and empty range.
786 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
pointBounds(unsigned Offset,const syntax::TokenBuffer & Tokens)787 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
788   const auto &SM = Tokens.sourceManager();
789   SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
790   llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
791   // Prefer right token over left.
792   for (const syntax::Token &Tok :
793        llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
794     if (shouldIgnore(Tok))
795       continue;
796     unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
797     Result.emplace_back(Offset, Offset + Tok.length());
798   }
799   if (Result.empty())
800     Result.emplace_back(Offset, Offset);
801   return Result;
802 }
803 
createEach(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End,llvm::function_ref<bool (SelectionTree)> Func)804 bool SelectionTree::createEach(ASTContext &AST,
805                                const syntax::TokenBuffer &Tokens,
806                                unsigned Begin, unsigned End,
807                                llvm::function_ref<bool(SelectionTree)> Func) {
808   if (Begin != End)
809     return Func(SelectionTree(AST, Tokens, Begin, End));
810   for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
811     if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
812       return true;
813   return false;
814 }
815 
createRight(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned int Begin,unsigned int End)816 SelectionTree SelectionTree::createRight(ASTContext &AST,
817                                          const syntax::TokenBuffer &Tokens,
818                                          unsigned int Begin, unsigned int End) {
819   llvm::Optional<SelectionTree> Result;
820   createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
821     Result = std::move(T);
822     return true;
823   });
824   return std::move(*Result);
825 }
826 
SelectionTree(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End)827 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
828                              unsigned Begin, unsigned End)
829     : PrintPolicy(AST.getLangOpts()) {
830   // No fundamental reason the selection needs to be in the main file,
831   // but that's all clangd has needed so far.
832   const SourceManager &SM = AST.getSourceManager();
833   FileID FID = SM.getMainFileID();
834   PrintPolicy.TerseOutput = true;
835   PrintPolicy.IncludeNewlines = false;
836 
837   dlog("Computing selection for {0}",
838        SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
839            .printToString(SM));
840   Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
841   Root = Nodes.empty() ? nullptr : &Nodes.front();
842   recordMetrics(*this, AST.getLangOpts());
843   dlog("Built selection tree\n{0}", *this);
844 }
845 
commonAncestor() const846 const Node *SelectionTree::commonAncestor() const {
847   const Node *Ancestor = Root;
848   while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
849     Ancestor = Ancestor->Children.front();
850   // Returning nullptr here is a bit unprincipled, but it makes the API safer:
851   // the TranslationUnitDecl contains all of the preamble, so traversing it is a
852   // performance cliff. Callers can check for null and use root() if they want.
853   return Ancestor != Root ? Ancestor : nullptr;
854 }
855 
getDeclContext() const856 const DeclContext &SelectionTree::Node::getDeclContext() const {
857   for (const Node *CurrentNode = this; CurrentNode != nullptr;
858        CurrentNode = CurrentNode->Parent) {
859     if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
860       if (CurrentNode != this)
861         if (auto *DC = dyn_cast<DeclContext>(Current))
862           return *DC;
863       return *Current->getDeclContext();
864     }
865   }
866   llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
867 }
868 
ignoreImplicit() const869 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
870   if (Children.size() == 1 &&
871       Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
872     return Children.front()->ignoreImplicit();
873   return *this;
874 }
875 
outerImplicit() const876 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
877   if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
878     return Parent->outerImplicit();
879   return *this;
880 }
881 
882 } // namespace clangd
883 } // namespace clang
884