1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/SymbolID.h"
19 #include "support/Logger.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/DeclBase.h"
22 #include "clang/AST/DeclCXX.h"
23 #include "clang/AST/DeclObjC.h"
24 #include "clang/AST/DeclTemplate.h"
25 #include "clang/Basic/SourceLocation.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Basic/Specifiers.h"
28 #include "clang/Index/IndexSymbol.h"
29 #include "clang/Index/IndexingAction.h"
30 #include "clang/Index/USRGeneration.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Tooling/Syntax/Tokens.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/Path.h"
37
38 namespace clang {
39 namespace clangd {
40 namespace {
41
42 /// If \p ND is a template specialization, returns the described template.
43 /// Otherwise, returns \p ND.
getTemplateOrThis(const NamedDecl & ND)44 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
45 if (auto T = ND.getDescribedTemplate())
46 return *T;
47 return ND;
48 }
49
50 // Returns a URI of \p Path. Firstly, this makes the \p Path absolute using the
51 // current working directory of the given SourceManager if the Path is not an
52 // absolute path. If failed, this resolves relative paths against \p FallbackDir
53 // to get an absolute path. Then, this tries creating an URI for the absolute
54 // path with schemes specified in \p Opts. This returns an URI with the first
55 // working scheme, if there is any; otherwise, this returns None.
56 //
57 // The Path can be a path relative to the build directory, or retrieved from
58 // the SourceManager.
toURI(const SourceManager & SM,llvm::StringRef Path,const SymbolCollector::Options & Opts)59 std::string toURI(const SourceManager &SM, llvm::StringRef Path,
60 const SymbolCollector::Options &Opts) {
61 llvm::SmallString<128> AbsolutePath(Path);
62 if (auto File = SM.getFileManager().getFile(Path)) {
63 if (auto CanonPath = getCanonicalPath(*File, SM)) {
64 AbsolutePath = *CanonPath;
65 }
66 }
67 // We don't perform is_absolute check in an else branch because makeAbsolute
68 // might return a relative path on some InMemoryFileSystems.
69 if (!llvm::sys::path::is_absolute(AbsolutePath) && !Opts.FallbackDir.empty())
70 llvm::sys::fs::make_absolute(Opts.FallbackDir, AbsolutePath);
71 llvm::sys::path::remove_dots(AbsolutePath, /*remove_dot_dot=*/true);
72 return URI::create(AbsolutePath).toString();
73 }
74
75 // Checks whether the decl is a private symbol in a header generated by
76 // protobuf compiler.
77 // FIXME: make filtering extensible when there are more use cases for symbol
78 // filters.
isPrivateProtoDecl(const NamedDecl & ND)79 bool isPrivateProtoDecl(const NamedDecl &ND) {
80 const auto &SM = ND.getASTContext().getSourceManager();
81 if (!isProtoFile(nameLocation(ND, SM), SM))
82 return false;
83
84 // ND without identifier can be operators.
85 if (ND.getIdentifier() == nullptr)
86 return false;
87 auto Name = ND.getIdentifier()->getName();
88 if (!Name.contains('_'))
89 return false;
90 // Nested proto entities (e.g. Message::Nested) have top-level decls
91 // that shouldn't be used (Message_Nested). Ignore them completely.
92 // The nested entities are dangling type aliases, we may want to reconsider
93 // including them in the future.
94 // For enum constants, SOME_ENUM_CONSTANT is not private and should be
95 // indexed. Outer_INNER is private. This heuristic relies on naming style, it
96 // will include OUTER_INNER and exclude some_enum_constant.
97 // FIXME: the heuristic relies on naming style (i.e. no underscore in
98 // user-defined names) and can be improved.
99 return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
100 }
101
102 // We only collect #include paths for symbols that are suitable for global code
103 // completion, except for namespaces since #include path for a namespace is hard
104 // to define.
shouldCollectIncludePath(index::SymbolKind Kind)105 bool shouldCollectIncludePath(index::SymbolKind Kind) {
106 using SK = index::SymbolKind;
107 switch (Kind) {
108 case SK::Macro:
109 case SK::Enum:
110 case SK::Struct:
111 case SK::Class:
112 case SK::Union:
113 case SK::TypeAlias:
114 case SK::Using:
115 case SK::Function:
116 case SK::Variable:
117 case SK::EnumConstant:
118 return true;
119 default:
120 return false;
121 }
122 }
123
124 // Return the symbol range of the token at \p TokLoc.
125 std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc,const SourceManager & SM,const LangOptions & LangOpts)126 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
127 const LangOptions &LangOpts) {
128 auto CreatePosition = [&SM](SourceLocation Loc) {
129 auto LSPLoc = sourceLocToPosition(SM, Loc);
130 SymbolLocation::Position Pos;
131 Pos.setLine(LSPLoc.line);
132 Pos.setColumn(LSPLoc.character);
133 return Pos;
134 };
135
136 auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
137 return {CreatePosition(TokLoc),
138 CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
139 }
140
141 // Return the symbol location of the token at \p TokLoc.
142 llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc,const SourceManager & SM,const SymbolCollector::Options & Opts,const clang::LangOptions & LangOpts,std::string & FileURIStorage)143 getTokenLocation(SourceLocation TokLoc, const SourceManager &SM,
144 const SymbolCollector::Options &Opts,
145 const clang::LangOptions &LangOpts,
146 std::string &FileURIStorage) {
147 auto Path = SM.getFilename(TokLoc);
148 if (Path.empty())
149 return None;
150 FileURIStorage = toURI(SM, Path, Opts);
151 SymbolLocation Result;
152 Result.FileURI = FileURIStorage.c_str();
153 auto Range = getTokenRange(TokLoc, SM, LangOpts);
154 Result.Start = Range.first;
155 Result.End = Range.second;
156
157 return Result;
158 }
159
160 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
161 // its symbol (e.g. a go-to-declaration target). This overrides the default of
162 // using Clang's canonical declaration, which is the first in the TU.
163 //
164 // Example: preferring a class declaration over its forward declaration.
isPreferredDeclaration(const NamedDecl & ND,index::SymbolRoleSet Roles)165 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
166 const auto &SM = ND.getASTContext().getSourceManager();
167 if (isa<TagDecl>(ND))
168 return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
169 !isInsideMainFile(ND.getLocation(), SM);
170 if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
171 return ID->isThisDeclarationADefinition();
172 if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
173 return PD->isThisDeclarationADefinition();
174 return false;
175 }
176
toRefKind(index::SymbolRoleSet Roles,bool Spelled=false)177 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
178 RefKind Result = RefKind::Unknown;
179 if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
180 Result |= RefKind::Declaration;
181 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
182 Result |= RefKind::Definition;
183 if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
184 Result |= RefKind::Reference;
185 if (Spelled)
186 Result |= RefKind::Spelled;
187 return Result;
188 }
189
shouldIndexRelation(const index::SymbolRelation & R)190 bool shouldIndexRelation(const index::SymbolRelation &R) {
191 // We currently only index BaseOf relations, for type hierarchy subtypes.
192 return R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf);
193 }
194
195 } // namespace
196
SymbolCollector(Options Opts)197 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
198
initialize(ASTContext & Ctx)199 void SymbolCollector::initialize(ASTContext &Ctx) {
200 ASTCtx = &Ctx;
201 CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
202 CompletionTUInfo =
203 std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
204 }
205
shouldCollectSymbol(const NamedDecl & ND,const ASTContext & ASTCtx,const Options & Opts,bool IsMainFileOnly)206 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
207 const ASTContext &ASTCtx,
208 const Options &Opts,
209 bool IsMainFileOnly) {
210 // Skip anonymous declarations, e.g (anonymous enum/class/struct).
211 if (ND.getDeclName().isEmpty())
212 return false;
213
214 // Skip main-file symbols if we are not collecting them.
215 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
216 return false;
217
218 // Skip symbols in anonymous namespaces in header files.
219 if (!IsMainFileOnly && ND.isInAnonymousNamespace())
220 return false;
221
222 // We want most things but not "local" symbols such as symbols inside
223 // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
224 // FIXME: Need a matcher for ExportDecl in order to include symbols declared
225 // within an export.
226 const auto *DeclCtx = ND.getDeclContext();
227 switch (DeclCtx->getDeclKind()) {
228 case Decl::TranslationUnit:
229 case Decl::Namespace:
230 case Decl::LinkageSpec:
231 case Decl::Enum:
232 case Decl::ObjCProtocol:
233 case Decl::ObjCInterface:
234 case Decl::ObjCCategory:
235 case Decl::ObjCCategoryImpl:
236 case Decl::ObjCImplementation:
237 break;
238 default:
239 // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
240 // easier to cast.
241 if (!isa<RecordDecl>(DeclCtx))
242 return false;
243 }
244
245 // Avoid indexing internal symbols in protobuf generated headers.
246 if (isPrivateProtoDecl(ND))
247 return false;
248 return true;
249 }
250
251 // Always return true to continue indexing.
handleDeclOccurrence(const Decl * D,index::SymbolRoleSet Roles,llvm::ArrayRef<index::SymbolRelation> Relations,SourceLocation Loc,index::IndexDataConsumer::ASTNodeInfo ASTNode)252 bool SymbolCollector::handleDeclOccurrence(
253 const Decl *D, index::SymbolRoleSet Roles,
254 llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
255 index::IndexDataConsumer::ASTNodeInfo ASTNode) {
256 assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
257 assert(CompletionAllocator && CompletionTUInfo);
258 assert(ASTNode.OrigD);
259 // Indexing API puts canonical decl into D, which might not have a valid
260 // source location for implicit/built-in decls. Fallback to original decl in
261 // such cases.
262 if (D->getLocation().isInvalid())
263 D = ASTNode.OrigD;
264 // If OrigD is an declaration associated with a friend declaration and it's
265 // not a definition, skip it. Note that OrigD is the occurrence that the
266 // collector is currently visiting.
267 if ((ASTNode.OrigD->getFriendObjectKind() !=
268 Decl::FriendObjectKind::FOK_None) &&
269 !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
270 return true;
271 // A declaration created for a friend declaration should not be used as the
272 // canonical declaration in the index. Use OrigD instead, unless we've already
273 // picked a replacement for D
274 if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
275 D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
276 // Flag to mark that D should be considered canonical meaning its declaration
277 // will override any previous declaration for the Symbol.
278 bool DeclIsCanonical = false;
279 // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
280 // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
281 if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
282 DeclIsCanonical = true;
283 if (const auto *CID = IID->getClassInterface())
284 if (const auto *DD = CID->getDefinition())
285 if (!DD->isImplicitInterfaceDecl())
286 D = DD;
287 }
288 // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
289 // its ObjCCategoryDecl if it has one.
290 if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
291 DeclIsCanonical = true;
292 if (const auto *CD = CID->getCategoryDecl())
293 D = CD;
294 }
295 const NamedDecl *ND = dyn_cast<NamedDecl>(D);
296 if (!ND)
297 return true;
298
299 // Mark D as referenced if this is a reference coming from the main file.
300 // D may not be an interesting symbol, but it's cheaper to check at the end.
301 auto &SM = ASTCtx->getSourceManager();
302 if (Opts.CountReferences &&
303 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
304 SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
305 ReferencedDecls.insert(ND);
306
307 auto ID = getSymbolID(ND);
308 if (!ID)
309 return true;
310
311 // ND is the canonical (i.e. first) declaration. If it's in the main file
312 // (which is not a header), then no public declaration was visible, so assume
313 // it's main-file only.
314 bool IsMainFileOnly =
315 SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
316 !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
317 ASTCtx->getLangOpts());
318 // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
319 if (ASTNode.OrigD->isImplicit() ||
320 !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
321 return true;
322
323 // Note: we need to process relations for all decl occurrences, including
324 // refs, because the indexing code only populates relations for specific
325 // occurrences. For example, RelationBaseOf is only populated for the
326 // occurrence inside the base-specifier.
327 processRelations(*ND, ID, Relations);
328
329 bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
330 bool IsOnlyRef =
331 !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
332 static_cast<unsigned>(index::SymbolRole::Definition)));
333
334 if (IsOnlyRef && !CollectRef)
335 return true;
336
337 // Unlike other fields, e.g. Symbols (which use spelling locations), we use
338 // file locations for references (as it aligns the behavior of clangd's
339 // AST-based xref).
340 // FIXME: we should try to use the file locations for other fields.
341 if (CollectRef &&
342 (!IsMainFileOnly || Opts.CollectMainFileRefs ||
343 ND->isExternallyVisible()) &&
344 !isa<NamespaceDecl>(ND) &&
345 (Opts.RefsInHeaders ||
346 SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
347 DeclRefs[ND].push_back(
348 SymbolRef{SM.getFileLoc(Loc), Roles, ASTNode.Parent});
349 // Don't continue indexing if this is a mere reference.
350 if (IsOnlyRef)
351 return true;
352
353 // FIXME: ObjCPropertyDecl are not properly indexed here:
354 // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
355 // not a NamedDecl.
356 auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
357 if (!OriginalDecl)
358 return true;
359
360 const Symbol *BasicSymbol = Symbols.find(ID);
361 if (isPreferredDeclaration(*OriginalDecl, Roles))
362 // If OriginalDecl is preferred, replace/create the existing canonical
363 // declaration (e.g. a class forward declaration). There should be at most
364 // one duplicate as we expect to see only one preferred declaration per
365 // TU, because in practice they are definitions.
366 BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
367 else if (!BasicSymbol || DeclIsCanonical)
368 BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
369
370 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
371 addDefinition(*OriginalDecl, *BasicSymbol);
372
373 return true;
374 }
375
handleMacros(const MainFileMacros & MacroRefsToIndex)376 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
377 assert(PP.get());
378 const auto &SM = PP->getSourceManager();
379 const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
380 assert(MainFileEntry);
381
382 const auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts);
383 // Add macro references.
384 for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
385 for (const auto &Range : IDToRefs.second) {
386 Ref R;
387 R.Location.Start.setLine(Range.start.line);
388 R.Location.Start.setColumn(Range.start.character);
389 R.Location.End.setLine(Range.end.line);
390 R.Location.End.setColumn(Range.end.character);
391 R.Location.FileURI = MainFileURI.c_str();
392 // FIXME: Add correct RefKind information to MainFileMacros.
393 R.Kind = RefKind::Reference;
394 Refs.insert(IDToRefs.first, R);
395 }
396 }
397 }
398
handleMacroOccurrence(const IdentifierInfo * Name,const MacroInfo * MI,index::SymbolRoleSet Roles,SourceLocation Loc)399 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
400 const MacroInfo *MI,
401 index::SymbolRoleSet Roles,
402 SourceLocation Loc) {
403 assert(PP.get());
404 // Builtin macros don't have useful locations and aren't needed in completion.
405 if (MI->isBuiltinMacro())
406 return true;
407
408 const auto &SM = PP->getSourceManager();
409 auto DefLoc = MI->getDefinitionLoc();
410 // Also avoid storing predefined macros like __DBL_MIN__.
411 if (SM.isWrittenInBuiltinFile(DefLoc))
412 return true;
413
414 auto ID = getSymbolID(Name->getName(), MI, SM);
415 if (!ID)
416 return true;
417
418 auto SpellingLoc = SM.getSpellingLoc(Loc);
419 bool IsMainFileOnly =
420 SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
421 !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
422 ASTCtx->getLangOpts());
423 // Do not store references to main-file macros.
424 if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
425 (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
426 // FIXME: Populate container information for macro references.
427 MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
428
429 // Collect symbols.
430 if (!Opts.CollectMacro)
431 return true;
432
433 // Skip main-file macros if we are not collecting them.
434 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
435 return false;
436
437 // Mark the macro as referenced if this is a reference coming from the main
438 // file. The macro may not be an interesting symbol, but it's cheaper to check
439 // at the end.
440 if (Opts.CountReferences &&
441 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
442 SM.getFileID(SpellingLoc) == SM.getMainFileID())
443 ReferencedMacros.insert(Name);
444
445 // Don't continue indexing if this is a mere reference.
446 // FIXME: remove macro with ID if it is undefined.
447 if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
448 Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
449 return true;
450
451 // Only collect one instance in case there are multiple.
452 if (Symbols.find(ID) != nullptr)
453 return true;
454
455 Symbol S;
456 S.ID = std::move(ID);
457 S.Name = Name->getName();
458 if (!IsMainFileOnly) {
459 S.Flags |= Symbol::IndexedForCodeCompletion;
460 S.Flags |= Symbol::VisibleOutsideFile;
461 }
462 S.SymInfo = index::getSymbolInfoForMacro(*MI);
463 S.Origin = Opts.Origin;
464 std::string FileURI;
465 // FIXME: use the result to filter out symbols.
466 shouldIndexFile(SM.getFileID(Loc));
467 if (auto DeclLoc =
468 getTokenLocation(DefLoc, SM, Opts, PP->getLangOpts(), FileURI))
469 S.CanonicalDeclaration = *DeclLoc;
470
471 CodeCompletionResult SymbolCompletion(Name);
472 const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
473 *PP, *CompletionAllocator, *CompletionTUInfo);
474 std::string Signature;
475 std::string SnippetSuffix;
476 getSignature(*CCS, &Signature, &SnippetSuffix);
477 S.Signature = Signature;
478 S.CompletionSnippetSuffix = SnippetSuffix;
479
480 IndexedMacros.insert(Name);
481 setIncludeLocation(S, DefLoc);
482 Symbols.insert(S);
483 return true;
484 }
485
processRelations(const NamedDecl & ND,const SymbolID & ID,ArrayRef<index::SymbolRelation> Relations)486 void SymbolCollector::processRelations(
487 const NamedDecl &ND, const SymbolID &ID,
488 ArrayRef<index::SymbolRelation> Relations) {
489 // Store subtype relations.
490 if (!dyn_cast<TagDecl>(&ND))
491 return;
492
493 for (const auto &R : Relations) {
494 if (!shouldIndexRelation(R))
495 continue;
496
497 const Decl *Object = R.RelatedSymbol;
498
499 auto ObjectID = getSymbolID(Object);
500 if (!ObjectID)
501 continue;
502
503 // Record the relation.
504 // TODO: There may be cases where the object decl is not indexed for some
505 // reason. Those cases should probably be removed in due course, but for
506 // now there are two possible ways to handle it:
507 // (A) Avoid storing the relation in such cases.
508 // (B) Store it anyways. Clients will likely lookup() the SymbolID
509 // in the index and find nothing, but that's a situation they
510 // probably need to handle for other reasons anyways.
511 // We currently do (B) because it's simpler.
512 this->Relations.insert(Relation{ID, RelationKind::BaseOf, ObjectID});
513 }
514 }
515
setIncludeLocation(const Symbol & S,SourceLocation Loc)516 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
517 if (Opts.CollectIncludePath)
518 if (shouldCollectIncludePath(S.SymInfo.Kind))
519 // Use the expansion location to get the #include header since this is
520 // where the symbol is exposed.
521 IncludeFiles[S.ID] =
522 PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
523 }
524
finish()525 void SymbolCollector::finish() {
526 // At the end of the TU, add 1 to the refcount of all referenced symbols.
527 auto IncRef = [this](const SymbolID &ID) {
528 if (const auto *S = Symbols.find(ID)) {
529 Symbol Inc = *S;
530 ++Inc.References;
531 Symbols.insert(Inc);
532 }
533 };
534 for (const NamedDecl *ND : ReferencedDecls) {
535 if (auto ID = getSymbolID(ND)) {
536 IncRef(ID);
537 }
538 }
539 if (Opts.CollectMacro) {
540 assert(PP);
541 // First, drop header guards. We can't identify these until EOF.
542 for (const IdentifierInfo *II : IndexedMacros) {
543 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
544 if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
545 if (MI->isUsedForHeaderGuard())
546 Symbols.erase(ID);
547 }
548 // Now increment refcounts.
549 for (const IdentifierInfo *II : ReferencedMacros) {
550 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
551 if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
552 IncRef(ID);
553 }
554 }
555 // Fill in IncludeHeaders.
556 // We delay this until end of TU so header guards are all resolved.
557 // Symbols in slabs aren't mutable, so insert() has to walk all the strings
558 // :-(
559 for (const auto &Entry : IncludeFiles)
560 if (const Symbol *S = Symbols.find(Entry.first)) {
561 if (auto Header = getIncludeHeader(*S, Entry.second)) {
562 Symbol NewSym = *S;
563 NewSym.IncludeHeaders.push_back({std::move(*Header), 1});
564 Symbols.insert(NewSym);
565 }
566 }
567
568 const auto &SM = ASTCtx->getSourceManager();
569 llvm::DenseMap<FileID, std::string> URICache;
570 auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> {
571 auto Found = URICache.find(FID);
572 if (Found == URICache.end()) {
573 if (auto *FileEntry = SM.getFileEntryForID(FID)) {
574 auto FileURI = toURI(SM, FileEntry->getName(), Opts);
575 Found = URICache.insert({FID, FileURI}).first;
576 } else {
577 // Ignore cases where we can not find a corresponding file entry for
578 // given location, e.g. symbols formed via macro concatenation.
579 return None;
580 }
581 }
582 return Found->second;
583 };
584 auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
585 bool Spelled = false) {
586 auto FileID = SM.getFileID(LocAndRole.Loc);
587 // FIXME: use the result to filter out references.
588 shouldIndexFile(FileID);
589 if (auto FileURI = GetURI(FileID)) {
590 auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
591 Ref R;
592 R.Location.Start = Range.first;
593 R.Location.End = Range.second;
594 R.Location.FileURI = FileURI->c_str();
595 R.Kind = toRefKind(LocAndRole.Roles, Spelled);
596 R.Container = getSymbolID(LocAndRole.Container);
597 Refs.insert(ID, R);
598 }
599 };
600 // Populate Refs slab from MacroRefs.
601 // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
602 for (const auto &IDAndRefs : MacroRefs)
603 for (const auto &LocAndRole : IDAndRefs.second)
604 CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
605 // Populate Refs slab from DeclRefs.
606 llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
607 for (auto &DeclAndRef : DeclRefs) {
608 if (auto ID = getSymbolID(DeclAndRef.first)) {
609 for (auto &LocAndRole : DeclAndRef.second) {
610 const auto FileID = SM.getFileID(LocAndRole.Loc);
611 // FIXME: It's better to use TokenBuffer by passing spelled tokens from
612 // the caller of SymbolCollector.
613 if (!FilesToTokensCache.count(FileID))
614 FilesToTokensCache[FileID] =
615 syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
616 llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
617 // Check if the referenced symbol is spelled exactly the same way the
618 // corresponding NamedDecl is. If it is, mark this reference as spelled.
619 const auto *IdentifierToken =
620 spelledIdentifierTouching(LocAndRole.Loc, Tokens);
621 DeclarationName Name = DeclAndRef.first->getDeclName();
622 const auto NameKind = Name.getNameKind();
623 bool IsTargetKind = NameKind == DeclarationName::Identifier ||
624 NameKind == DeclarationName::CXXConstructorName;
625 bool Spelled = IdentifierToken && IsTargetKind &&
626 Name.getAsString() == IdentifierToken->text(SM);
627 CollectRef(ID, LocAndRole, Spelled);
628 }
629 }
630 }
631
632 ReferencedDecls.clear();
633 ReferencedMacros.clear();
634 DeclRefs.clear();
635 FilesToIndexCache.clear();
636 HeaderIsSelfContainedCache.clear();
637 IncludeFiles.clear();
638 }
639
addDeclaration(const NamedDecl & ND,SymbolID ID,bool IsMainFileOnly)640 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
641 bool IsMainFileOnly) {
642 auto &Ctx = ND.getASTContext();
643 auto &SM = Ctx.getSourceManager();
644
645 Symbol S;
646 S.ID = std::move(ID);
647 std::string QName = printQualifiedName(ND);
648 // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
649 // for consistency with CodeCompletionString and a clean name/signature split.
650 std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
651 std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
652 S.TemplateSpecializationArgs = TemplateSpecializationArgs;
653
654 // We collect main-file symbols, but do not use them for code completion.
655 if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
656 S.Flags |= Symbol::IndexedForCodeCompletion;
657 if (isImplementationDetail(&ND))
658 S.Flags |= Symbol::ImplementationDetail;
659 if (!IsMainFileOnly)
660 S.Flags |= Symbol::VisibleOutsideFile;
661 S.SymInfo = index::getSymbolInfo(&ND);
662 std::string FileURI;
663 auto Loc = nameLocation(ND, SM);
664 assert(Loc.isValid() && "Invalid source location for NamedDecl");
665 // FIXME: use the result to filter out symbols.
666 shouldIndexFile(SM.getFileID(Loc));
667 if (auto DeclLoc =
668 getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
669 S.CanonicalDeclaration = *DeclLoc;
670
671 S.Origin = Opts.Origin;
672 if (ND.getAvailability() == AR_Deprecated)
673 S.Flags |= Symbol::Deprecated;
674
675 // Add completion info.
676 // FIXME: we may want to choose a different redecl, or combine from several.
677 assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
678 // We use the primary template, as clang does during code completion.
679 CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
680 const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
681 *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
682 *CompletionTUInfo,
683 /*IncludeBriefComments*/ false);
684 std::string Documentation =
685 formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
686 /*CommentsFromHeaders=*/true));
687 if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
688 if (Opts.StoreAllDocumentation)
689 S.Documentation = Documentation;
690 Symbols.insert(S);
691 return Symbols.find(S.ID);
692 }
693 S.Documentation = Documentation;
694 std::string Signature;
695 std::string SnippetSuffix;
696 getSignature(*CCS, &Signature, &SnippetSuffix);
697 S.Signature = Signature;
698 S.CompletionSnippetSuffix = SnippetSuffix;
699 std::string ReturnType = getReturnType(*CCS);
700 S.ReturnType = ReturnType;
701
702 llvm::Optional<OpaqueType> TypeStorage;
703 if (S.Flags & Symbol::IndexedForCodeCompletion) {
704 TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
705 if (TypeStorage)
706 S.Type = TypeStorage->raw();
707 }
708
709 Symbols.insert(S);
710 setIncludeLocation(S, ND.getLocation());
711 return Symbols.find(S.ID);
712 }
713
addDefinition(const NamedDecl & ND,const Symbol & DeclSym)714 void SymbolCollector::addDefinition(const NamedDecl &ND,
715 const Symbol &DeclSym) {
716 if (DeclSym.Definition)
717 return;
718 // If we saw some forward declaration, we end up copying the symbol.
719 // This is not ideal, but avoids duplicating the "is this a definition" check
720 // in clang::index. We should only see one definition.
721 Symbol S = DeclSym;
722 std::string FileURI;
723 const auto &SM = ND.getASTContext().getSourceManager();
724 auto Loc = nameLocation(ND, SM);
725 // FIXME: use the result to filter out symbols.
726 shouldIndexFile(SM.getFileID(Loc));
727 if (auto DefLoc =
728 getTokenLocation(Loc, SM, Opts, ASTCtx->getLangOpts(), FileURI))
729 S.Definition = *DefLoc;
730 Symbols.insert(S);
731 }
732
733 /// Gets a canonical include (URI of the header or <header> or "header") for
734 /// header of \p FID (which should usually be the *expansion* file).
735 /// Returns None if includes should not be inserted for this file.
getIncludeHeader(const Symbol & S,FileID FID)736 llvm::Optional<std::string> SymbolCollector::getIncludeHeader(const Symbol &S,
737 FileID FID) {
738 const SourceManager &SM = ASTCtx->getSourceManager();
739 const FileEntry *FE = SM.getFileEntryForID(FID);
740 if (!FE || FE->getName().empty())
741 return llvm::None;
742 llvm::StringRef Filename = FE->getName();
743 // If a file is mapped by canonical headers, use that mapping, regardless
744 // of whether it's an otherwise-good header (header guards etc).
745 if (Opts.Includes) {
746 llvm::SmallString<256> QName = S.Scope;
747 QName.append(S.Name);
748 llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
749 // If we had a mapping, always use it.
750 if (Canonical.startswith("<") || Canonical.startswith("\"")) {
751 // Hack: there are two std::move() overloads from different headers.
752 // CanonicalIncludes returns the common one-arg one from <utility>.
753 if (Canonical == "<utility>" && S.Name == "move" &&
754 S.Signature.contains(','))
755 Canonical = "<algorithm>";
756 return Canonical.str();
757 }
758 if (Canonical != Filename)
759 return toURI(SM, Canonical, Opts);
760 }
761 if (!isSelfContainedHeader(FID)) {
762 // A .inc or .def file is often included into a real header to define
763 // symbols (e.g. LLVM tablegen files).
764 if (Filename.endswith(".inc") || Filename.endswith(".def"))
765 return getIncludeHeader(S, SM.getFileID(SM.getIncludeLoc(FID)));
766 // Conservatively refuse to insert #includes to files without guards.
767 return llvm::None;
768 }
769 // Standard case: just insert the file itself.
770 return toURI(SM, Filename, Opts);
771 }
772
isSelfContainedHeader(FileID FID)773 bool SymbolCollector::isSelfContainedHeader(FileID FID) {
774 // The real computation (which will be memoized).
775 auto Compute = [&] {
776 const SourceManager &SM = ASTCtx->getSourceManager();
777 const FileEntry *FE = SM.getFileEntryForID(FID);
778 if (!FE)
779 return false;
780 // FIXME: Should files that have been #import'd be considered
781 // self-contained? That's really a property of the includer,
782 // not of the file.
783 if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
784 !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
785 return false;
786 // This pattern indicates that a header can't be used without
787 // particular preprocessor state, usually set up by another header.
788 if (isDontIncludeMeHeader(SM.getBufferData(FID)))
789 return false;
790 return true;
791 };
792
793 auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
794 if (R.second)
795 R.first->second = Compute();
796 return R.first->second;
797 }
798
799 // Is Line an #if or #ifdef directive?
isIf(llvm::StringRef Line)800 static bool isIf(llvm::StringRef Line) {
801 Line = Line.ltrim();
802 if (!Line.consume_front("#"))
803 return false;
804 Line = Line.ltrim();
805 return Line.startswith("if");
806 }
807 // Is Line an #error directive mentioning includes?
isErrorAboutInclude(llvm::StringRef Line)808 static bool isErrorAboutInclude(llvm::StringRef Line) {
809 Line = Line.ltrim();
810 if (!Line.consume_front("#"))
811 return false;
812 Line = Line.ltrim();
813 if (!Line.startswith("error"))
814 return false;
815 return Line.contains_lower("includ"); // Matches "include" or "including".
816 }
817
isDontIncludeMeHeader(llvm::StringRef Content)818 bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
819 llvm::StringRef Line;
820 // Only sniff up to 100 lines or 10KB.
821 Content = Content.take_front(100 * 100);
822 for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
823 std::tie(Line, Content) = Content.split('\n');
824 if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
825 return true;
826 }
827 return false;
828 }
829
shouldIndexFile(FileID FID)830 bool SymbolCollector::shouldIndexFile(FileID FID) {
831 if (!Opts.FileFilter)
832 return true;
833 auto I = FilesToIndexCache.try_emplace(FID);
834 if (I.second)
835 I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
836 return I.first->second;
837 }
838
839 } // namespace clangd
840 } // namespace clang
841