1 //===--- FindSymbols.cpp ------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "FindSymbols.h"
9 
10 #include "AST.h"
11 #include "FuzzyMatch.h"
12 #include "Logger.h"
13 #include "ParsedAST.h"
14 #include "Quality.h"
15 #include "SourceCode.h"
16 #include "index/Index.h"
17 #include "clang/AST/DeclTemplate.h"
18 #include "clang/Index/IndexDataConsumer.h"
19 #include "clang/Index/IndexSymbol.h"
20 #include "clang/Index/IndexingAction.h"
21 #include "llvm/Support/FormatVariadic.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/ScopedPrinter.h"
24 
25 #define DEBUG_TYPE "FindSymbols"
26 
27 namespace clang {
28 namespace clangd {
29 
30 namespace {
31 using ScoredSymbolInfo = std::pair<float, SymbolInformation>;
32 struct ScoredSymbolGreater {
operator ()clang::clangd::__anonfc4b82590111::ScoredSymbolGreater33   bool operator()(const ScoredSymbolInfo &L, const ScoredSymbolInfo &R) {
34     if (L.first != R.first)
35       return L.first > R.first;
36     return L.second.name < R.second.name; // Earlier name is better.
37   }
38 };
39 
40 } // namespace
41 
symbolToLocation(const Symbol & Sym,llvm::StringRef HintPath)42 llvm::Expected<Location> symbolToLocation(const Symbol &Sym,
43                                           llvm::StringRef HintPath) {
44   // Prefer the definition over e.g. a function declaration in a header
45   auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration;
46   auto Path = URI::resolve(CD.FileURI, HintPath);
47   if (!Path) {
48     return llvm::make_error<llvm::StringError>(
49         formatv("Could not resolve path for symbol '{0}': {1}",
50                 Sym.Name, llvm::toString(Path.takeError())),
51         llvm::inconvertibleErrorCode());
52   }
53   Location L;
54   // Use HintPath as TUPath since there is no TU associated with this
55   // request.
56   L.uri = URIForFile::canonicalize(*Path, HintPath);
57   Position Start, End;
58   Start.line = CD.Start.line();
59   Start.character = CD.Start.column();
60   End.line = CD.End.line();
61   End.character = CD.End.column();
62   L.range = {Start, End};
63   return L;
64 }
65 
66 llvm::Expected<std::vector<SymbolInformation>>
getWorkspaceSymbols(llvm::StringRef Query,int Limit,const SymbolIndex * const Index,llvm::StringRef HintPath)67 getWorkspaceSymbols(llvm::StringRef Query, int Limit,
68                     const SymbolIndex *const Index, llvm::StringRef HintPath) {
69   std::vector<SymbolInformation> Result;
70   if (Query.empty() || !Index)
71     return Result;
72 
73   auto Names = splitQualifiedName(Query);
74 
75   FuzzyFindRequest Req;
76   Req.Query = Names.second;
77 
78   // FuzzyFind doesn't want leading :: qualifier
79   bool IsGlobalQuery = Names.first.consume_front("::");
80   // Restrict results to the scope in the query string if present (global or
81   // not).
82   if (IsGlobalQuery || !Names.first.empty())
83     Req.Scopes = {Names.first};
84   else
85     Req.AnyScope = true;
86   if (Limit)
87     Req.Limit = Limit;
88   TopN<ScoredSymbolInfo, ScoredSymbolGreater> Top(
89       Req.Limit ? *Req.Limit : std::numeric_limits<size_t>::max());
90   FuzzyMatcher Filter(Req.Query);
91   Index->fuzzyFind(Req, [HintPath, &Top, &Filter](const Symbol &Sym) {
92     auto Loc = symbolToLocation(Sym, HintPath);
93     if (!Loc) {
94       log("Workspace symbols: {0}", Loc.takeError());
95       return;
96     }
97 
98     SymbolKind SK = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind);
99     std::string Scope = Sym.Scope;
100     llvm::StringRef ScopeRef = Scope;
101     ScopeRef.consume_back("::");
102     SymbolInformation Info = {(Sym.Name + Sym.TemplateSpecializationArgs).str(),
103                               SK, *Loc, ScopeRef};
104 
105     SymbolQualitySignals Quality;
106     Quality.merge(Sym);
107     SymbolRelevanceSignals Relevance;
108     Relevance.Name = Sym.Name;
109     Relevance.Query = SymbolRelevanceSignals::Generic;
110     if (auto NameMatch = Filter.match(Sym.Name))
111       Relevance.NameMatch = *NameMatch;
112     else {
113       log("Workspace symbol: {0} didn't match query {1}", Sym.Name,
114           Filter.pattern());
115       return;
116     }
117     Relevance.merge(Sym);
118     auto Score =
119         evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate());
120     dlog("FindSymbols: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name, Score,
121          Quality, Relevance);
122 
123     Top.push({Score, std::move(Info)});
124   });
125   for (auto &R : std::move(Top).items())
126     Result.push_back(std::move(R.second));
127   return Result;
128 }
129 
130 namespace {
declToSym(ASTContext & Ctx,const NamedDecl & ND)131 llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
132   auto &SM = Ctx.getSourceManager();
133 
134   SourceLocation NameLoc = nameLocation(ND, SM);
135   // getFileLoc is a good choice for us, but we also need to make sure
136   // sourceLocToPosition won't switch files, so we call getSpellingLoc on top of
137   // that to make sure it does not switch files.
138   // FIXME: sourceLocToPosition should not switch files!
139   SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc()));
140   SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc()));
141   if (NameLoc.isInvalid() || BeginLoc.isInvalid() || EndLoc.isInvalid())
142     return llvm::None;
143 
144   if (!SM.isWrittenInMainFile(NameLoc) || !SM.isWrittenInMainFile(BeginLoc) ||
145       !SM.isWrittenInMainFile(EndLoc))
146     return llvm::None;
147 
148   Position NameBegin = sourceLocToPosition(SM, NameLoc);
149   Position NameEnd = sourceLocToPosition(
150       SM, Lexer::getLocForEndOfToken(NameLoc, 0, SM, Ctx.getLangOpts()));
151 
152   index::SymbolInfo SymInfo = index::getSymbolInfo(&ND);
153   // FIXME: this is not classifying constructors, destructors and operators
154   //        correctly (they're all "methods").
155   SymbolKind SK = indexSymbolKindToSymbolKind(SymInfo.Kind);
156 
157   DocumentSymbol SI;
158   SI.name = printName(Ctx, ND);
159   SI.kind = SK;
160   SI.deprecated = ND.isDeprecated();
161   SI.range =
162       Range{sourceLocToPosition(SM, BeginLoc), sourceLocToPosition(SM, EndLoc)};
163   SI.selectionRange = Range{NameBegin, NameEnd};
164   if (!SI.range.contains(SI.selectionRange)) {
165     // 'selectionRange' must be contained in 'range', so in cases where clang
166     // reports unrelated ranges we need to reconcile somehow.
167     SI.range = SI.selectionRange;
168   }
169   return SI;
170 }
171 
172 /// A helper class to build an outline for the parse AST. It traverses the AST
173 /// directly instead of using RecursiveASTVisitor (RAV) for three main reasons:
174 ///    - there is no way to keep RAV from traversing subtrees we are not
175 ///      interested in. E.g. not traversing function locals or implicit template
176 ///      instantiations.
177 ///    - it's easier to combine results of recursive passes,
178 ///    - visiting decls is actually simple, so we don't hit the complicated
179 ///      cases that RAV mostly helps with (types, expressions, etc.)
180 class DocumentOutline {
181 public:
DocumentOutline(ParsedAST & AST)182   DocumentOutline(ParsedAST &AST) : AST(AST) {}
183 
184   /// Builds the document outline for the generated AST.
build()185   std::vector<DocumentSymbol> build() {
186     std::vector<DocumentSymbol> Results;
187     for (auto &TopLevel : AST.getLocalTopLevelDecls())
188       traverseDecl(TopLevel, Results);
189     return Results;
190   }
191 
192 private:
193   enum class VisitKind { No, OnlyDecl, DeclAndChildren };
194 
traverseDecl(Decl * D,std::vector<DocumentSymbol> & Results)195   void traverseDecl(Decl *D, std::vector<DocumentSymbol> &Results) {
196     if (auto *Templ = llvm::dyn_cast<TemplateDecl>(D)) {
197       // TemplatedDecl might be null, e.g. concepts.
198       if (auto *TD = Templ->getTemplatedDecl())
199         D = TD;
200     }
201     auto *ND = llvm::dyn_cast<NamedDecl>(D);
202     if (!ND)
203       return;
204     VisitKind Visit = shouldVisit(ND);
205     if (Visit == VisitKind::No)
206       return;
207     llvm::Optional<DocumentSymbol> Sym = declToSym(AST.getASTContext(), *ND);
208     if (!Sym)
209       return;
210     if (Visit == VisitKind::DeclAndChildren)
211       traverseChildren(D, Sym->children);
212     Results.push_back(std::move(*Sym));
213   }
214 
traverseChildren(Decl * D,std::vector<DocumentSymbol> & Results)215   void traverseChildren(Decl *D, std::vector<DocumentSymbol> &Results) {
216     auto *Scope = llvm::dyn_cast<DeclContext>(D);
217     if (!Scope)
218       return;
219     for (auto *C : Scope->decls())
220       traverseDecl(C, Results);
221   }
222 
shouldVisit(NamedDecl * D)223   VisitKind shouldVisit(NamedDecl *D) {
224     if (D->isImplicit())
225       return VisitKind::No;
226 
227     if (auto Func = llvm::dyn_cast<FunctionDecl>(D)) {
228       // Some functions are implicit template instantiations, those should be
229       // ignored.
230       if (auto *Info = Func->getTemplateSpecializationInfo()) {
231         if (!Info->isExplicitInstantiationOrSpecialization())
232           return VisitKind::No;
233       }
234       // Only visit the function itself, do not visit the children (i.e.
235       // function parameters, etc.)
236       return VisitKind::OnlyDecl;
237     }
238     // Handle template instantiations. We have three cases to consider:
239     //   - explicit instantiations, e.g. 'template class std::vector<int>;'
240     //     Visit the decl itself (it's present in the code), but not the
241     //     children.
242     //   - implicit instantiations, i.e. not written by the user.
243     //     Do not visit at all, they are not present in the code.
244     //   - explicit specialization, e.g. 'template <> class vector<bool> {};'
245     //     Visit both the decl and its children, both are written in the code.
246     if (auto *TemplSpec = llvm::dyn_cast<ClassTemplateSpecializationDecl>(D)) {
247       if (TemplSpec->isExplicitInstantiationOrSpecialization())
248         return TemplSpec->isExplicitSpecialization()
249                    ? VisitKind::DeclAndChildren
250                    : VisitKind::OnlyDecl;
251       return VisitKind::No;
252     }
253     if (auto *TemplSpec = llvm::dyn_cast<VarTemplateSpecializationDecl>(D)) {
254       if (TemplSpec->isExplicitInstantiationOrSpecialization())
255         return TemplSpec->isExplicitSpecialization()
256                    ? VisitKind::DeclAndChildren
257                    : VisitKind::OnlyDecl;
258       return VisitKind::No;
259     }
260     // For all other cases, visit both the children and the decl.
261     return VisitKind::DeclAndChildren;
262   }
263 
264   ParsedAST &AST;
265 };
266 
collectDocSymbols(ParsedAST & AST)267 std::vector<DocumentSymbol> collectDocSymbols(ParsedAST &AST) {
268   return DocumentOutline(AST).build();
269 }
270 } // namespace
271 
getDocumentSymbols(ParsedAST & AST)272 llvm::Expected<std::vector<DocumentSymbol>> getDocumentSymbols(ParsedAST &AST) {
273   return collectDocSymbols(AST);
274 }
275 
276 } // namespace clangd
277 } // namespace clang
278