1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "Selection.h"
10 #include "Logger.h"
11 #include "SourceCode.h"
12 #include "clang/AST/ASTTypeTraits.h"
13 #include "clang/AST/Decl.h"
14 #include "clang/AST/DeclCXX.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/PrettyPrinter.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/TypeLoc.h"
20 #include "clang/Basic/OperatorKinds.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Basic/TokenKinds.h"
24 #include "clang/Lex/Lexer.h"
25 #include "clang/Tooling/Syntax/Tokens.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <string>
31
32 namespace clang {
33 namespace clangd {
34 namespace {
35 using Node = SelectionTree::Node;
36 using ast_type_traits::DynTypedNode;
37
38 // An IntervalSet maintains a set of disjoint subranges of an array.
39 //
40 // Initially, it contains the entire array.
41 // [-----------------------------------------------------------]
42 //
43 // When a range is erased(), it will typically split the array in two.
44 // Claim: [--------------------]
45 // after: [----------------] [-------------------]
46 //
47 // erase() returns the segments actually erased. Given the state above:
48 // Claim: [---------------------------------------]
49 // Out: [---------] [------]
50 // After: [-----] [-----------]
51 //
52 // It is used to track (expanded) tokens not yet associated with an AST node.
53 // On traversing an AST node, its token range is erased from the unclaimed set.
54 // The tokens actually removed are associated with that node, and hit-tested
55 // against the selection to determine whether the node is selected.
56 template <typename T>
57 class IntervalSet {
58 public:
IntervalSet(llvm::ArrayRef<T> Range)59 IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
60
61 // Removes the elements of Claim from the set, modifying or removing ranges
62 // that overlap it.
63 // Returns the continuous subranges of Claim that were actually removed.
erase(llvm::ArrayRef<T> Claim)64 llvm::SmallVector<llvm::ArrayRef<T>, 4> erase(llvm::ArrayRef<T> Claim) {
65 llvm::SmallVector<llvm::ArrayRef<T>, 4> Out;
66 if (Claim.empty())
67 return Out;
68
69 // General case:
70 // Claim: [-----------------]
71 // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
72 // Overlap: ^first ^second
73 // Ranges C and D are fully included. Ranges B and E must be trimmed.
74 auto Overlap = std::make_pair(
75 UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
76 UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F
77 // Rewind to cover B.
78 if (Overlap.first != UnclaimedRanges.begin()) {
79 --Overlap.first;
80 // ...unless B isn't selected at all.
81 if (Overlap.first->end() <= Claim.begin())
82 ++Overlap.first;
83 }
84 if (Overlap.first == Overlap.second)
85 return Out;
86
87 // First, copy all overlapping ranges into the output.
88 auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
89 // If any of the overlapping ranges were sliced by the claim, split them:
90 // - restrict the returned range to the claimed part
91 // - save the unclaimed part so it can be reinserted
92 llvm::ArrayRef<T> RemainingHead, RemainingTail;
93 if (Claim.begin() > OutFirst->begin()) {
94 RemainingHead = {OutFirst->begin(), Claim.begin()};
95 *OutFirst = {Claim.begin(), OutFirst->end()};
96 }
97 if (Claim.end() < Out.back().end()) {
98 RemainingTail = {Claim.end(), Out.back().end()};
99 Out.back() = {Out.back().begin(), Claim.end()};
100 }
101
102 // Erase all the overlapping ranges (invalidating all iterators).
103 UnclaimedRanges.erase(Overlap.first, Overlap.second);
104 // Reinsert ranges that were merely trimmed.
105 if (!RemainingHead.empty())
106 UnclaimedRanges.insert(RemainingHead);
107 if (!RemainingTail.empty())
108 UnclaimedRanges.insert(RemainingTail);
109
110 return Out;
111 }
112
113 private:
114 using TokenRange = llvm::ArrayRef<T>;
115 struct RangeLess {
operator ()clang::clangd::__anon87c11f350111::IntervalSet::RangeLess116 bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
117 return L.begin() < R.begin();
118 }
119 };
120
121 // Disjoint sorted unclaimed ranges of expanded tokens.
122 std::set<llvm::ArrayRef<T>, RangeLess>
123 UnclaimedRanges;
124 };
125
126 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
127 // This resolves to Unselected if no tokens are ever seen.
128 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
129 // This value is never exposed publicly.
130 constexpr SelectionTree::Selection NoTokens =
131 static_cast<SelectionTree::Selection>(
132 static_cast<unsigned char>(SelectionTree::Complete + 1));
133
134 // Nodes start with NoTokens, and then use this function to aggregate the
135 // selectedness as more tokens are found.
update(SelectionTree::Selection & Result,SelectionTree::Selection New)136 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
137 if (New == NoTokens)
138 return;
139 if (Result == NoTokens)
140 Result = New;
141 else if (Result != New)
142 // Can only be completely selected (or unselected) if all tokens are.
143 Result = SelectionTree::Partial;
144 }
145
146
147 // SelectionTester can determine whether a range of tokens from the PP-expanded
148 // stream (corresponding to an AST node) is considered selected.
149 //
150 // When the tokens result from macro expansions, the appropriate tokens in the
151 // main file are examined (macro invocation or args). Similarly for #includes.
152 //
153 // It tests each token in the range (not just the endpoints) as contiguous
154 // expanded tokens may not have contiguous spellings (with macros).
155 //
156 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
157 // are ignored when determining selectedness.
158 class SelectionTester {
159 public:
160 // The selection is offsets [SelBegin, SelEnd) in SelFile.
SelectionTester(const syntax::TokenBuffer & Buf,FileID SelFile,unsigned SelBegin,unsigned SelEnd,const SourceManager & SM)161 SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
162 unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
163 : SelFile(SelFile), SM(SM) {
164 // Find all tokens (partially) selected in the file.
165 auto AllSpelledTokens = Buf.spelledTokens(SelFile);
166 const syntax::Token *SelFirst =
167 llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
168 return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
169 });
170 const syntax::Token *SelLimit = std::partition_point(
171 SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
172 return SM.getFileOffset(Tok.location()) < SelEnd;
173 });
174 // Precompute selectedness and offset for selected spelled tokens.
175 for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) {
176 // As well as comments, don't count semicolons as real tokens.
177 // They're not properly claimed as expr-statement is missing from the AST.
178 if (T->kind() == tok::comment || T->kind() == tok::semi)
179 continue;
180 SpelledTokens.emplace_back();
181 Tok &S = SpelledTokens.back();
182 S.Offset = SM.getFileOffset(T->location());
183 if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd)
184 S.Selected = SelectionTree::Complete;
185 else
186 S.Selected = SelectionTree::Partial;
187 }
188 }
189
190 // Test whether a consecutive range of tokens is selected.
191 // The tokens are taken from the expanded token stream.
192 SelectionTree::Selection
test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const193 test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
194 if (SpelledTokens.empty())
195 return NoTokens;
196 SelectionTree::Selection Result = NoTokens;
197 while (!ExpandedTokens.empty()) {
198 // Take consecutive tokens from the same context together for efficiency.
199 FileID FID = SM.getFileID(ExpandedTokens.front().location());
200 auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
201 return SM.getFileID(T.location()) == FID;
202 });
203 assert(!Batch.empty());
204 ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
205
206 update(Result, testChunk(FID, Batch));
207 }
208 return Result;
209 }
210
211 // Cheap check whether any of the tokens in R might be selected.
212 // If it returns false, test() will return NoTokens or Unselected.
213 // If it returns true, test() may return any value.
mayHit(SourceRange R) const214 bool mayHit(SourceRange R) const {
215 if (SpelledTokens.empty())
216 return false;
217 auto B = SM.getDecomposedLoc(R.getBegin());
218 auto E = SM.getDecomposedLoc(R.getEnd());
219 if (B.first == SelFile && E.first == SelFile)
220 if (E.second < SpelledTokens.front().Offset ||
221 B.second > SpelledTokens.back().Offset)
222 return false;
223 return true;
224 }
225
226 private:
227 // Hit-test a consecutive range of tokens from a single file ID.
228 SelectionTree::Selection
testChunk(FileID FID,llvm::ArrayRef<syntax::Token> Batch) const229 testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
230 assert(!Batch.empty());
231 SourceLocation StartLoc = Batch.front().location();
232 // There are several possible categories of FileID depending on how the
233 // preprocessor was used to generate these tokens:
234 // main file, #included file, macro args, macro bodies.
235 // We need to identify the main-file tokens that represent Batch, and
236 // determine whether we want to exclusively claim them. Regular tokens
237 // represent one AST construct, but a macro invocation can represent many.
238
239 // Handle tokens written directly in the main file.
240 if (FID == SelFile) {
241 return testTokenRange(SM.getFileOffset(Batch.front().location()),
242 SM.getFileOffset(Batch.back().location()));
243 }
244
245 // Handle tokens in another file #included into the main file.
246 // Check if the #include is selected, but don't claim it exclusively.
247 if (StartLoc.isFileID()) {
248 for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
249 Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
250 if (SM.getFileID(Loc) == SelFile)
251 // FIXME: use whole #include directive, not just the filename string.
252 return testToken(SM.getFileOffset(Loc));
253 }
254 return NoTokens;
255 }
256
257 assert(StartLoc.isMacroID());
258 // Handle tokens that were passed as a macro argument.
259 SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
260 if (SM.getFileID(ArgStart) == SelFile) {
261 SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location());
262 return testTokenRange(SM.getFileOffset(ArgStart),
263 SM.getFileOffset(ArgEnd));
264 }
265
266 // Handle tokens produced by non-argument macro expansion.
267 // Check if the macro name is selected, don't claim it exclusively.
268 auto Expansion = SM.getDecomposedExpansionLoc(StartLoc);
269 if (Expansion.first == SelFile)
270 // FIXME: also check ( and ) for function-like macros?
271 return testToken(Expansion.second);
272 else
273 return NoTokens;
274 }
275
276 // Is the closed token range [Begin, End] selected?
testTokenRange(unsigned Begin,unsigned End) const277 SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
278 assert(Begin <= End);
279 // Outside the selection entirely?
280 if (End < SpelledTokens.front().Offset ||
281 Begin > SpelledTokens.back().Offset)
282 return SelectionTree::Unselected;
283
284 // Compute range of tokens.
285 auto B = llvm::partition_point(
286 SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; });
287 auto E = std::partition_point(
288 B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; });
289
290 // Aggregate selectedness of tokens in range.
291 bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset ||
292 End > SpelledTokens.back().Offset;
293 SelectionTree::Selection Result =
294 ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
295 for (auto It = B; It != E; ++It)
296 update(Result, It->Selected);
297 return Result;
298 }
299
300 // Is the token at `Offset` selected?
testToken(unsigned Offset) const301 SelectionTree::Selection testToken(unsigned Offset) const {
302 // Outside the selection entirely?
303 if (Offset < SpelledTokens.front().Offset ||
304 Offset > SpelledTokens.back().Offset)
305 return SelectionTree::Unselected;
306 // Find the token, if it exists.
307 auto It = llvm::partition_point(
308 SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; });
309 if (It != SpelledTokens.end() && It->Offset == Offset)
310 return It->Selected;
311 return NoTokens;
312 }
313
314 struct Tok {
315 unsigned Offset;
316 SelectionTree::Selection Selected;
317 };
318 std::vector<Tok> SpelledTokens;
319 FileID SelFile;
320 const SourceManager &SM;
321 };
322
323 // Show the type of a node for debugging.
printNodeKind(llvm::raw_ostream & OS,const DynTypedNode & N)324 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
325 if (const TypeLoc *TL = N.get<TypeLoc>()) {
326 // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
327 // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
328 if (TL->getTypeLocClass() == TypeLoc::Qualified)
329 OS << "QualifiedTypeLoc";
330 else
331 OS << TL->getType()->getTypeClassName() << "TypeLoc";
332 } else {
333 OS << N.getNodeKind().asStringRef();
334 }
335 }
336
337 #ifndef NDEBUG
printNodeToString(const DynTypedNode & N,const PrintingPolicy & PP)338 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
339 std::string S;
340 llvm::raw_string_ostream OS(S);
341 printNodeKind(OS, N);
342 OS << " ";
343 return std::move(OS.str());
344 }
345 #endif
346
isImplicit(const Stmt * S)347 bool isImplicit(const Stmt* S) {
348 // Some Stmts are implicit and shouldn't be traversed, but there's no
349 // "implicit" attribute on Stmt/Expr.
350 // Unwrap implicit casts first if present (other nodes too?).
351 if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
352 S = ICE->getSubExprAsWritten();
353 // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
354 // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
355 if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
356 if (CTI->isImplicit())
357 return true;
358 // Refs to operator() and [] are (almost?) always implicit as part of calls.
359 if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
360 if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
361 switch (FD->getOverloadedOperator()) {
362 case OO_Call:
363 case OO_Subscript:
364 return true;
365 default:
366 break;
367 }
368 }
369 }
370 return false;
371 }
372
373 // We find the selection by visiting written nodes in the AST, looking for nodes
374 // that intersect with the selected character range.
375 //
376 // While traversing, we maintain a parent stack. As nodes pop off the stack,
377 // we decide whether to keep them or not. To be kept, they must either be
378 // selected or contain some nodes that are.
379 //
380 // For simple cases (not inside macros) we prune subtrees that don't intersect.
381 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
382 public:
383 // Runs the visitor to gather selected nodes and their ancestors.
384 // If there is any selection, the root (TUDecl) is the first node.
collect(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned Begin,unsigned End,FileID File)385 static std::deque<Node> collect(ASTContext &AST,
386 const syntax::TokenBuffer &Tokens,
387 const PrintingPolicy &PP, unsigned Begin,
388 unsigned End, FileID File) {
389 SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
390 V.TraverseAST(AST);
391 assert(V.Stack.size() == 1 && "Unpaired push/pop?");
392 assert(V.Stack.top() == &V.Nodes.front());
393 return std::move(V.Nodes);
394 }
395
396 // We traverse all "well-behaved" nodes the same way:
397 // - push the node onto the stack
398 // - traverse its children recursively
399 // - pop it from the stack
400 // - hit testing: is intersection(node, selection) - union(children) empty?
401 // - attach it to the tree if it or any children hit the selection
402 //
403 // Two categories of nodes are not "well-behaved":
404 // - those without source range information, we don't record those
405 // - those that can't be stored in DynTypedNode.
406 // We're missing some interesting things like Attr due to the latter.
TraverseDecl(Decl * X)407 bool TraverseDecl(Decl *X) {
408 if (X && isa<TranslationUnitDecl>(X))
409 return Base::TraverseDecl(X); // Already pushed by constructor.
410 // Base::TraverseDecl will suppress children, but not this node itself.
411 if (X && X->isImplicit())
412 return true;
413 return traverseNode(X, [&] { return Base::TraverseDecl(X); });
414 }
TraverseTypeLoc(TypeLoc X)415 bool TraverseTypeLoc(TypeLoc X) {
416 return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
417 }
TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X)418 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
419 return traverseNode(
420 &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
421 }
TraverseConstructorInitializer(CXXCtorInitializer * X)422 bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
423 return traverseNode(
424 X, [&] { return Base::TraverseConstructorInitializer(X); });
425 }
426 // Stmt is the same, but this form allows the data recursion optimization.
dataTraverseStmtPre(Stmt * X)427 bool dataTraverseStmtPre(Stmt *X) {
428 if (!X || isImplicit(X))
429 return false;
430 auto N = DynTypedNode::create(*X);
431 if (canSafelySkipNode(N))
432 return false;
433 push(std::move(N));
434 if (shouldSkipChildren(X)) {
435 pop();
436 return false;
437 }
438 return true;
439 }
dataTraverseStmtPost(Stmt * X)440 bool dataTraverseStmtPost(Stmt *X) {
441 pop();
442 return true;
443 }
444 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
445 // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
446 // This means we'd never see 'int' in 'const int'! Work around that here.
447 // (The reason for the behavior is to avoid traversing the nested Type twice,
448 // but we ignore TraverseType anyway).
TraverseQualifiedTypeLoc(QualifiedTypeLoc QX)449 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
450 return traverseNode<TypeLoc>(
451 &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
452 }
453 // Uninteresting parts of the AST that don't have locations within them.
TraverseNestedNameSpecifier(NestedNameSpecifier *)454 bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
TraverseType(QualType)455 bool TraverseType(QualType) { return true; }
456
457 // The DeclStmt for the loop variable claims to cover the whole range
458 // inside the parens, this causes the range-init expression to not be hit.
459 // Traverse the loop VarDecl instead, which has the right source range.
TraverseCXXForRangeStmt(CXXForRangeStmt * S)460 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
461 return traverseNode(S, [&] {
462 return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
463 TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
464 });
465 }
466
467 private:
468 using Base = RecursiveASTVisitor<SelectionVisitor>;
469
SelectionVisitor(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned SelBegin,unsigned SelEnd,FileID SelFile)470 SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
471 const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
472 FileID SelFile)
473 : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
474 #ifndef NDEBUG
475 PrintPolicy(PP),
476 #endif
477 TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
478 UnclaimedExpandedTokens(Tokens.expandedTokens()) {
479 // Ensure we have a node for the TU decl, regardless of traversal scope.
480 Nodes.emplace_back();
481 Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
482 Nodes.back().Parent = nullptr;
483 Nodes.back().Selected = SelectionTree::Unselected;
484 Stack.push(&Nodes.back());
485 }
486
487 // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
488 // Node is always a pointer so the generic code can handle any null checks.
489 template <typename T, typename Func>
traverseNode(T * Node,const Func & Body)490 bool traverseNode(T *Node, const Func &Body) {
491 if (Node == nullptr)
492 return true;
493 auto N = DynTypedNode::create(*Node);
494 if (canSafelySkipNode(N))
495 return true;
496 push(DynTypedNode::create(*Node));
497 bool Ret = Body();
498 pop();
499 return Ret;
500 }
501
502 // HIT TESTING
503 //
504 // We do rough hit testing on the way down the tree to avoid traversing
505 // subtrees that don't touch the selection (canSafelySkipNode), but
506 // fine-grained hit-testing is mostly done on the way back up (in pop()).
507 // This means children get to claim parts of the selection first, and parents
508 // are only selected if they own tokens that no child owned.
509 //
510 // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
511 // parent's, and a node (transitively) owns all tokens in its range.
512 //
513 // Exception 1: child range claims tokens that should be owned by the parent.
514 // e.g. in `void foo(int);`, the FunctionTypeLoc should own
515 // `void (int)` but the parent FunctionDecl should own `foo`.
516 // To handle this case, certain nodes claim small token ranges *before*
517 // their children are traversed. (see earlySourceRange).
518 //
519 // Exception 2: siblings both claim the same node.
520 // e.g. `int x, y;` produces two sibling VarDecls.
521 // ~~~~~ x
522 // ~~~~~~~~ y
523 // Here the first ("leftmost") sibling claims the tokens it wants, and the
524 // other sibling gets what's left. So selecting "int" only includes the left
525 // VarDecl in the selection tree.
526
527 // An optimization for a common case: nodes outside macro expansions that
528 // don't intersect the selection may be recursively skipped.
canSafelySkipNode(const DynTypedNode & N)529 bool canSafelySkipNode(const DynTypedNode &N) {
530 SourceRange S = N.getSourceRange();
531 if (auto *TL = N.get<TypeLoc>()) {
532 // DeclTypeTypeLoc::getSourceRange() is incomplete, which would lead to
533 // failing
534 // to descend into the child expression.
535 // decltype(2+2);
536 // ~~~~~~~~~~~~~ <-- correct range
537 // ~~~~~~~~ <-- range reported by getSourceRange()
538 // ~~~~~~~~~~~~ <-- range with this hack(i.e, missing closing paren)
539 // FIXME: Alter DecltypeTypeLoc to contain parentheses locations and get
540 // rid of this patch.
541 if (auto DT = TL->getAs<DecltypeTypeLoc>())
542 S.setEnd(DT.getUnderlyingExpr()->getEndLoc());
543 }
544 if (!SelChecker.mayHit(S)) {
545 dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent());
546 dlog("{1}skipped range = {0}", S.printToString(SM), indent(1));
547 return true;
548 }
549 return false;
550 }
551
552 // There are certain nodes we want to treat as leaves in the SelectionTree,
553 // although they do have children.
shouldSkipChildren(const Stmt * X) const554 bool shouldSkipChildren(const Stmt *X) const {
555 // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
556 // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
557 // So we treat UserDefinedLiteral as a leaf node, owning the token.
558 return llvm::isa<UserDefinedLiteral>(X);
559 }
560
561 // Pushes a node onto the ancestor stack. Pairs with pop().
562 // Performs early hit detection for some nodes (on the earlySourceRange).
push(DynTypedNode Node)563 void push(DynTypedNode Node) {
564 SourceRange Early = earlySourceRange(Node);
565 dlog("{1}push: {0}", printNodeToString(Node, PrintPolicy), indent());
566 Nodes.emplace_back();
567 Nodes.back().ASTNode = std::move(Node);
568 Nodes.back().Parent = Stack.top();
569 Nodes.back().Selected = NoTokens;
570 Stack.push(&Nodes.back());
571 claimRange(Early, Nodes.back().Selected);
572 }
573
574 // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
575 // Performs primary hit detection.
pop()576 void pop() {
577 Node &N = *Stack.top();
578 dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
579 claimRange(N.ASTNode.getSourceRange(), N.Selected);
580 if (N.Selected == NoTokens)
581 N.Selected = SelectionTree::Unselected;
582 if (N.Selected || !N.Children.empty()) {
583 // Attach to the tree.
584 N.Parent->Children.push_back(&N);
585 } else {
586 // Neither N any children are selected, it doesn't belong in the tree.
587 assert(&N == &Nodes.back());
588 Nodes.pop_back();
589 }
590 Stack.pop();
591 }
592
593 // Returns the range of tokens that this node will claim directly, and
594 // is not available to the node's children.
595 // Usually empty, but sometimes children cover tokens but shouldn't own them.
earlySourceRange(const DynTypedNode & N)596 SourceRange earlySourceRange(const DynTypedNode &N) {
597 if (const Decl *D = N.get<Decl>()) {
598 // We want constructor name to be claimed by TypeLoc not the constructor
599 // itself. Similar for deduction guides, we rather want to select the
600 // underlying TypeLoc.
601 // FIXME: Unfortunately this doesn't work, even though RecursiveASTVisitor
602 // traverses the underlying TypeLoc inside DeclarationName, it is null for
603 // constructors.
604 if (isa<CXXConstructorDecl>(D) || isa<CXXDeductionGuideDecl>(D))
605 return SourceRange();
606 // This will capture Field, Function, MSProperty, NonTypeTemplateParm and
607 // VarDecls. We want the name in the declarator to be claimed by the decl
608 // and not by any children. For example:
609 // void [[foo]]();
610 // int (*[[s]])();
611 // struct X { int [[hash]] [32]; [[operator]] int();}
612 if (const auto *DD = llvm::dyn_cast<DeclaratorDecl>(D))
613 return DD->getLocation();
614 } else if (const auto *CCI = N.get<CXXCtorInitializer>()) {
615 // : [[b_]](42)
616 return CCI->getMemberLocation();
617 }
618 return SourceRange();
619 }
620
621 // Perform hit-testing of a complete Node against the selection.
622 // This runs for every node in the AST, and must be fast in common cases.
623 // This is usually called from pop(), so we can take children into account.
624 // The existing state of Result is relevant (early/late claims can interact).
claimRange(SourceRange S,SelectionTree::Selection & Result)625 void claimRange(SourceRange S, SelectionTree::Selection &Result) {
626 for (const auto &ClaimedRange :
627 UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
628 update(Result, SelChecker.test(ClaimedRange));
629
630 if (Result && Result != NoTokens)
631 dlog("{1}hit selection: {0}", S.printToString(SM), indent());
632 }
633
indent(int Offset=0)634 std::string indent(int Offset = 0) {
635 // Cast for signed arithmetic.
636 int Amount = int(Stack.size()) + Offset;
637 assert(Amount >= 0);
638 return std::string(Amount, ' ');
639 }
640
641 SourceManager &SM;
642 const LangOptions &LangOpts;
643 #ifndef NDEBUG
644 const PrintingPolicy &PrintPolicy;
645 #endif
646 const syntax::TokenBuffer &TokenBuf;
647 std::stack<Node *> Stack;
648 SelectionTester SelChecker;
649 IntervalSet<syntax::Token> UnclaimedExpandedTokens;
650 std::deque<Node> Nodes; // Stable pointers as we add more nodes.
651 };
652
653 } // namespace
654
print(llvm::raw_ostream & OS,const SelectionTree::Node & N,int Indent) const655 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
656 int Indent) const {
657 if (N.Selected)
658 OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
659 : '.');
660 else
661 OS.indent(Indent);
662 printNodeKind(OS, N.ASTNode);
663 OS << ' ';
664 N.ASTNode.print(OS, PrintPolicy);
665 OS << "\n";
666 for (const Node *Child : N.Children)
667 print(OS, *Child, Indent + 2);
668 }
669
kind() const670 std::string SelectionTree::Node::kind() const {
671 std::string S;
672 llvm::raw_string_ostream OS(S);
673 printNodeKind(OS, ASTNode);
674 return std::move(OS.str());
675 }
676
677 // Decide which selection emulates a "point" query in between characters.
pointBounds(unsigned Offset,FileID FID,ASTContext & AST)678 static std::pair<unsigned, unsigned> pointBounds(unsigned Offset, FileID FID,
679 ASTContext &AST) {
680 StringRef Buf = AST.getSourceManager().getBufferData(FID);
681 // Edge-cases where the choice is forced.
682 if (Buf.size() == 0)
683 return {0, 0};
684 if (Offset == 0)
685 return {0, 1};
686 if (Offset == Buf.size())
687 return {Offset - 1, Offset};
688 // We could choose either this byte or the previous. Usually we prefer the
689 // character on the right of the cursor (or under a block cursor).
690 // But if that's whitespace/semicolon, we likely want the token on the left.
691 auto IsIgnoredChar = [](char C) { return isWhitespace(C) || C == ';'; };
692 if (IsIgnoredChar(Buf[Offset]) && !IsIgnoredChar(Buf[Offset - 1]))
693 return {Offset - 1, Offset};
694 return {Offset, Offset + 1};
695 }
696
SelectionTree(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End)697 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
698 unsigned Begin, unsigned End)
699 : PrintPolicy(AST.getLangOpts()) {
700 // No fundamental reason the selection needs to be in the main file,
701 // but that's all clangd has needed so far.
702 const SourceManager &SM = AST.getSourceManager();
703 FileID FID = SM.getMainFileID();
704 if (Begin == End)
705 std::tie(Begin, End) = pointBounds(Begin, FID, AST);
706 PrintPolicy.TerseOutput = true;
707 PrintPolicy.IncludeNewlines = false;
708
709 dlog("Computing selection for {0}",
710 SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
711 .printToString(SM));
712 Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
713 Root = Nodes.empty() ? nullptr : &Nodes.front();
714 dlog("Built selection tree\n{0}", *this);
715 }
716
SelectionTree(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Offset)717 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
718 unsigned Offset)
719 : SelectionTree(AST, Tokens, Offset, Offset) {}
720
commonAncestor() const721 const Node *SelectionTree::commonAncestor() const {
722 const Node *Ancestor = Root;
723 while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
724 Ancestor = Ancestor->Children.front();
725 // Returning nullptr here is a bit unprincipled, but it makes the API safer:
726 // the TranslationUnitDecl contains all of the preamble, so traversing it is a
727 // performance cliff. Callers can check for null and use root() if they want.
728 return Ancestor != Root ? Ancestor : nullptr;
729 }
730
getDeclContext() const731 const DeclContext& SelectionTree::Node::getDeclContext() const {
732 for (const Node* CurrentNode = this; CurrentNode != nullptr;
733 CurrentNode = CurrentNode->Parent) {
734 if (const Decl* Current = CurrentNode->ASTNode.get<Decl>()) {
735 if (CurrentNode != this)
736 if (auto *DC = dyn_cast<DeclContext>(Current))
737 return *DC;
738 return *Current->getDeclContext();
739 }
740 }
741 llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
742 }
743
ignoreImplicit() const744 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
745 if (Children.size() == 1 &&
746 Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
747 return Children.front()->ignoreImplicit();
748 return *this;
749 }
750
outerImplicit() const751 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
752 if (Parent && Parent->ASTNode.getSourceRange() == ASTNode.getSourceRange())
753 return Parent->outerImplicit();
754 return *this;
755 }
756
757 } // namespace clangd
758 } // namespace clang
759