1 //===--- FindSymbols.cpp ------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "FindSymbols.h"
9 
10 #include "AST.h"
11 #include "FuzzyMatch.h"
12 #include "ParsedAST.h"
13 #include "Quality.h"
14 #include "SourceCode.h"
15 #include "index/Index.h"
16 #include "support/Logger.h"
17 #include "clang/AST/DeclTemplate.h"
18 #include "clang/Index/IndexDataConsumer.h"
19 #include "clang/Index/IndexSymbol.h"
20 #include "clang/Index/IndexingAction.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/FormatVariadic.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/ScopedPrinter.h"
28 #include <limits>
29 #include <tuple>
30 
31 #define DEBUG_TYPE "FindSymbols"
32 
33 namespace clang {
34 namespace clangd {
35 
36 namespace {
37 using ScoredSymbolInfo = std::pair<float, SymbolInformation>;
38 struct ScoredSymbolGreater {
operator ()clang::clangd::__anon5723ad710111::ScoredSymbolGreater39   bool operator()(const ScoredSymbolInfo &L, const ScoredSymbolInfo &R) {
40     if (L.first != R.first)
41       return L.first > R.first;
42     return L.second.name < R.second.name; // Earlier name is better.
43   }
44 };
45 
46 // Returns true if \p Query can be found as a sub-sequence inside \p Scope.
approximateScopeMatch(llvm::StringRef Scope,llvm::StringRef Query)47 bool approximateScopeMatch(llvm::StringRef Scope, llvm::StringRef Query) {
48   assert(Scope.empty() || Scope.endswith("::"));
49   assert(Query.empty() || Query.endswith("::"));
50   while (!Scope.empty() && !Query.empty()) {
51     auto Colons = Scope.find("::");
52     assert(Colons != llvm::StringRef::npos);
53 
54     llvm::StringRef LeadingSpecifier = Scope.slice(0, Colons + 2);
55     Scope = Scope.slice(Colons + 2, llvm::StringRef::npos);
56     Query.consume_front(LeadingSpecifier);
57   }
58   return Query.empty();
59 }
60 
61 } // namespace
62 
indexToLSPLocation(const SymbolLocation & Loc,llvm::StringRef TUPath)63 llvm::Expected<Location> indexToLSPLocation(const SymbolLocation &Loc,
64                                             llvm::StringRef TUPath) {
65   auto Path = URI::resolve(Loc.FileURI, TUPath);
66   if (!Path)
67     return error("Could not resolve path for file '{0}': {1}", Loc.FileURI,
68                  Path.takeError());
69   Location L;
70   L.uri = URIForFile::canonicalize(*Path, TUPath);
71   Position Start, End;
72   Start.line = Loc.Start.line();
73   Start.character = Loc.Start.column();
74   End.line = Loc.End.line();
75   End.character = Loc.End.column();
76   L.range = {Start, End};
77   return L;
78 }
79 
symbolToLocation(const Symbol & Sym,llvm::StringRef TUPath)80 llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
81                                           llvm::StringRef TUPath) {
82   // Prefer the definition over e.g. a function declaration in a header
83   return indexToLSPLocation(
84       Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, TUPath);
85 }
86 
87 llvm::Expected<std::vector<SymbolInformation>>
getWorkspaceSymbols(llvm::StringRef Query,int Limit,const SymbolIndex * const Index,llvm::StringRef HintPath)88 getWorkspaceSymbols(llvm::StringRef Query, int Limit,
89                     const SymbolIndex *const Index, llvm::StringRef HintPath) {
90   std::vector<SymbolInformation> Result;
91   if (Query.empty() || !Index)
92     return Result;
93 
94   // Lookup for qualified names are performed as:
95   // - Exact namespaces are boosted by the index.
96   // - Approximate matches are (sub-scope match) included via AnyScope logic.
97   // - Non-matching namespaces (no sub-scope match) are post-filtered.
98   auto Names = splitQualifiedName(Query);
99 
100   FuzzyFindRequest Req;
101   Req.Query = std::string(Names.second);
102 
103   // FuzzyFind doesn't want leading :: qualifier.
104   auto HasLeadingColons = Names.first.consume_front("::");
105   // Limit the query to specific namespace if it is fully-qualified.
106   Req.AnyScope = !HasLeadingColons;
107   // Boost symbols from desired namespace.
108   if (HasLeadingColons || !Names.first.empty())
109     Req.Scopes = {std::string(Names.first)};
110   if (Limit) {
111     Req.Limit = Limit;
112     // If we are boosting a specific scope allow more results to be retrieved,
113     // since some symbols from preferred namespaces might not make the cut.
114     if (Req.AnyScope && !Req.Scopes.empty())
115       *Req.Limit *= 5;
116   }
117   TopN<ScoredSymbolInfo, ScoredSymbolGreater> Top(
118       Req.Limit ? *Req.Limit : std::numeric_limits<size_t>::max());
119   FuzzyMatcher Filter(Req.Query);
120 
121   Index->fuzzyFind(Req, [HintPath, &Top, &Filter, AnyScope = Req.AnyScope,
122                          ReqScope = Names.first](const Symbol &Sym) {
123     llvm::StringRef Scope = Sym.Scope;
124     // Fuzzyfind might return symbols from irrelevant namespaces if query was
125     // not fully-qualified, drop those.
126     if (AnyScope && !approximateScopeMatch(Scope, ReqScope))
127       return;
128 
129     auto Loc = symbolToLocation(Sym, HintPath);
130     if (!Loc) {
131       log("Workspace symbols: {0}", Loc.takeError());
132       return;
133     }
134 
135     SymbolQualitySignals Quality;
136     Quality.merge(Sym);
137     SymbolRelevanceSignals Relevance;
138     Relevance.Name = Sym.Name;
139     Relevance.Query = SymbolRelevanceSignals::Generic;
140     // If symbol and request scopes do not match exactly, apply a penalty.
141     Relevance.InBaseClass = AnyScope && Scope != ReqScope;
142     if (auto NameMatch = Filter.match(Sym.Name))
143       Relevance.NameMatch = *NameMatch;
144     else {
145       log("Workspace symbol: {0} didn't match query {1}", Sym.Name,
146           Filter.pattern());
147       return;
148     }
149     Relevance.merge(Sym);
150     auto QualScore = Quality.evaluateHeuristics();
151     auto RelScore = Relevance.evaluateHeuristics();
152     auto Score = evaluateSymbolAndRelevance(QualScore, RelScore);
153     dlog("FindSymbols: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name, Score,
154          Quality, Relevance);
155 
156     SymbolInformation Info;
157     Info.name = (Sym.Name + Sym.TemplateSpecializationArgs).str();
158     Info.kind = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind);
159     Info.location = *Loc;
160     Scope.consume_back("::");
161     Info.containerName = Scope.str();
162 
163     // Exposed score excludes fuzzy-match component, for client-side re-ranking.
164     Info.score = Relevance.NameMatch > std::numeric_limits<float>::epsilon()
165                      ? Score / Relevance.NameMatch
166                      : QualScore;
167     Top.push({Score, std::move(Info)});
168   });
169   for (auto &R : std::move(Top).items())
170     Result.push_back(std::move(R.second));
171   return Result;
172 }
173 
174 namespace {
declToSym(ASTContext & Ctx,const NamedDecl & ND)175 llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
176   auto &SM = Ctx.getSourceManager();
177 
178   SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc()));
179   SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc()));
180   const auto SymbolRange =
181       toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc});
182   if (!SymbolRange)
183     return llvm::None;
184 
185   index::SymbolInfo SymInfo = index::getSymbolInfo(&ND);
186   // FIXME: This is not classifying constructors, destructors and operators
187   // correctly.
188   SymbolKind SK = indexSymbolKindToSymbolKind(SymInfo.Kind);
189 
190   DocumentSymbol SI;
191   SI.name = printName(Ctx, ND);
192   SI.kind = SK;
193   SI.deprecated = ND.isDeprecated();
194   SI.range = Range{sourceLocToPosition(SM, SymbolRange->getBegin()),
195                    sourceLocToPosition(SM, SymbolRange->getEnd())};
196 
197   SourceLocation NameLoc = ND.getLocation();
198   SourceLocation FallbackNameLoc;
199   if (NameLoc.isMacroID()) {
200     if (isSpelledInSource(NameLoc, SM)) {
201       // Prefer the spelling loc, but save the expansion loc as a fallback.
202       FallbackNameLoc = SM.getExpansionLoc(NameLoc);
203       NameLoc = SM.getSpellingLoc(NameLoc);
204     } else {
205       NameLoc = SM.getExpansionLoc(NameLoc);
206     }
207   }
208   auto ComputeSelectionRange = [&](SourceLocation L) -> Range {
209     Position NameBegin = sourceLocToPosition(SM, L);
210     Position NameEnd = sourceLocToPosition(
211         SM, Lexer::getLocForEndOfToken(L, 0, SM, Ctx.getLangOpts()));
212     return Range{NameBegin, NameEnd};
213   };
214 
215   SI.selectionRange = ComputeSelectionRange(NameLoc);
216   if (!SI.range.contains(SI.selectionRange) && FallbackNameLoc.isValid()) {
217     // 'selectionRange' must be contained in 'range'. In cases where clang
218     // reports unrelated ranges, we first try falling back to the expansion
219     // loc for the selection range.
220     SI.selectionRange = ComputeSelectionRange(FallbackNameLoc);
221   }
222   if (!SI.range.contains(SI.selectionRange)) {
223     // If the containment relationship still doesn't hold, throw away
224     // 'range' and use 'selectionRange' for both.
225     SI.range = SI.selectionRange;
226   }
227   return SI;
228 }
229 
230 /// A helper class to build an outline for the parse AST. It traverses the AST
231 /// directly instead of using RecursiveASTVisitor (RAV) for three main reasons:
232 ///    - there is no way to keep RAV from traversing subtrees we are not
233 ///      interested in. E.g. not traversing function locals or implicit template
234 ///      instantiations.
235 ///    - it's easier to combine results of recursive passes,
236 ///    - visiting decls is actually simple, so we don't hit the complicated
237 ///      cases that RAV mostly helps with (types, expressions, etc.)
238 class DocumentOutline {
239 public:
DocumentOutline(ParsedAST & AST)240   DocumentOutline(ParsedAST &AST) : AST(AST) {}
241 
242   /// Builds the document outline for the generated AST.
build()243   std::vector<DocumentSymbol> build() {
244     std::vector<DocumentSymbol> Results;
245     for (auto &TopLevel : AST.getLocalTopLevelDecls())
246       traverseDecl(TopLevel, Results);
247     return Results;
248   }
249 
250 private:
251   enum class VisitKind { No, OnlyDecl, OnlyChildren, DeclAndChildren };
252 
traverseDecl(Decl * D,std::vector<DocumentSymbol> & Results)253   void traverseDecl(Decl *D, std::vector<DocumentSymbol> &Results) {
254     // Skip symbols which do not originate from the main file.
255     if (!isInsideMainFile(D->getLocation(), AST.getSourceManager()))
256       return;
257 
258     if (auto *Templ = llvm::dyn_cast<TemplateDecl>(D)) {
259       // TemplatedDecl might be null, e.g. concepts.
260       if (auto *TD = Templ->getTemplatedDecl())
261         D = TD;
262     }
263 
264     VisitKind Visit = shouldVisit(D);
265     if (Visit == VisitKind::No)
266       return;
267 
268     if (Visit == VisitKind::OnlyChildren)
269       return traverseChildren(D, Results);
270 
271     auto *ND = llvm::cast<NamedDecl>(D);
272     auto Sym = declToSym(AST.getASTContext(), *ND);
273     if (!Sym)
274       return;
275     Results.push_back(std::move(*Sym));
276 
277     if (Visit == VisitKind::OnlyDecl)
278       return;
279 
280     assert(Visit == VisitKind::DeclAndChildren && "Unexpected VisitKind");
281     traverseChildren(ND, Results.back().children);
282   }
283 
traverseChildren(Decl * D,std::vector<DocumentSymbol> & Results)284   void traverseChildren(Decl *D, std::vector<DocumentSymbol> &Results) {
285     auto *Scope = llvm::dyn_cast<DeclContext>(D);
286     if (!Scope)
287       return;
288     for (auto *C : Scope->decls())
289       traverseDecl(C, Results);
290   }
291 
shouldVisit(Decl * D)292   VisitKind shouldVisit(Decl *D) {
293     if (D->isImplicit())
294       return VisitKind::No;
295 
296     if (llvm::isa<LinkageSpecDecl>(D) || llvm::isa<ExportDecl>(D))
297       return VisitKind::OnlyChildren;
298 
299     if (!llvm::isa<NamedDecl>(D))
300       return VisitKind::No;
301 
302     if (auto Func = llvm::dyn_cast<FunctionDecl>(D)) {
303       // Some functions are implicit template instantiations, those should be
304       // ignored.
305       if (auto *Info = Func->getTemplateSpecializationInfo()) {
306         if (!Info->isExplicitInstantiationOrSpecialization())
307           return VisitKind::No;
308       }
309       // Only visit the function itself, do not visit the children (i.e.
310       // function parameters, etc.)
311       return VisitKind::OnlyDecl;
312     }
313     // Handle template instantiations. We have three cases to consider:
314     //   - explicit instantiations, e.g. 'template class std::vector<int>;'
315     //     Visit the decl itself (it's present in the code), but not the
316     //     children.
317     //   - implicit instantiations, i.e. not written by the user.
318     //     Do not visit at all, they are not present in the code.
319     //   - explicit specialization, e.g. 'template <> class vector<bool> {};'
320     //     Visit both the decl and its children, both are written in the code.
321     if (auto *TemplSpec = llvm::dyn_cast<ClassTemplateSpecializationDecl>(D)) {
322       if (TemplSpec->isExplicitInstantiationOrSpecialization())
323         return TemplSpec->isExplicitSpecialization()
324                    ? VisitKind::DeclAndChildren
325                    : VisitKind::OnlyDecl;
326       return VisitKind::No;
327     }
328     if (auto *TemplSpec = llvm::dyn_cast<VarTemplateSpecializationDecl>(D)) {
329       if (TemplSpec->isExplicitInstantiationOrSpecialization())
330         return TemplSpec->isExplicitSpecialization()
331                    ? VisitKind::DeclAndChildren
332                    : VisitKind::OnlyDecl;
333       return VisitKind::No;
334     }
335     // For all other cases, visit both the children and the decl.
336     return VisitKind::DeclAndChildren;
337   }
338 
339   ParsedAST &AST;
340 };
341 
collectDocSymbols(ParsedAST & AST)342 std::vector<DocumentSymbol> collectDocSymbols(ParsedAST &AST) {
343   return DocumentOutline(AST).build();
344 }
345 } // namespace
346 
getDocumentSymbols(ParsedAST & AST)347 llvm::Expected<std::vector<DocumentSymbol>> getDocumentSymbols(ParsedAST &AST) {
348   return collectDocSymbols(AST);
349 }
350 
351 } // namespace clangd
352 } // namespace clang
353