1 //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "SymbolCollector.h"
10 #include "AST.h"
11 #include "CanonicalIncludes.h"
12 #include "CodeComplete.h"
13 #include "CodeCompletionStrings.h"
14 #include "ExpectedTypes.h"
15 #include "SourceCode.h"
16 #include "SymbolLocation.h"
17 #include "URI.h"
18 #include "index/Relation.h"
19 #include "index/SymbolID.h"
20 #include "support/Logger.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/DeclBase.h"
23 #include "clang/AST/DeclCXX.h"
24 #include "clang/AST/DeclObjC.h"
25 #include "clang/AST/DeclTemplate.h"
26 #include "clang/Basic/SourceLocation.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/Basic/Specifiers.h"
29 #include "clang/Index/IndexSymbol.h"
30 #include "clang/Index/IndexingAction.h"
31 #include "clang/Index/USRGeneration.h"
32 #include "clang/Lex/Preprocessor.h"
33 #include "clang/Tooling/Syntax/Tokens.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/Path.h"
38
39 namespace clang {
40 namespace clangd {
41 namespace {
42
43 /// If \p ND is a template specialization, returns the described template.
44 /// Otherwise, returns \p ND.
getTemplateOrThis(const NamedDecl & ND)45 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
46 if (auto T = ND.getDescribedTemplate())
47 return *T;
48 return ND;
49 }
50
51 // Checks whether the decl is a private symbol in a header generated by
52 // protobuf compiler.
53 // FIXME: make filtering extensible when there are more use cases for symbol
54 // filters.
isPrivateProtoDecl(const NamedDecl & ND)55 bool isPrivateProtoDecl(const NamedDecl &ND) {
56 const auto &SM = ND.getASTContext().getSourceManager();
57 if (!isProtoFile(nameLocation(ND, SM), SM))
58 return false;
59
60 // ND without identifier can be operators.
61 if (ND.getIdentifier() == nullptr)
62 return false;
63 auto Name = ND.getIdentifier()->getName();
64 if (!Name.contains('_'))
65 return false;
66 // Nested proto entities (e.g. Message::Nested) have top-level decls
67 // that shouldn't be used (Message_Nested). Ignore them completely.
68 // The nested entities are dangling type aliases, we may want to reconsider
69 // including them in the future.
70 // For enum constants, SOME_ENUM_CONSTANT is not private and should be
71 // indexed. Outer_INNER is private. This heuristic relies on naming style, it
72 // will include OUTER_INNER and exclude some_enum_constant.
73 // FIXME: the heuristic relies on naming style (i.e. no underscore in
74 // user-defined names) and can be improved.
75 return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
76 }
77
78 // We only collect #include paths for symbols that are suitable for global code
79 // completion, except for namespaces since #include path for a namespace is hard
80 // to define.
shouldCollectIncludePath(index::SymbolKind Kind)81 bool shouldCollectIncludePath(index::SymbolKind Kind) {
82 using SK = index::SymbolKind;
83 switch (Kind) {
84 case SK::Macro:
85 case SK::Enum:
86 case SK::Struct:
87 case SK::Class:
88 case SK::Union:
89 case SK::TypeAlias:
90 case SK::Using:
91 case SK::Function:
92 case SK::Variable:
93 case SK::EnumConstant:
94 return true;
95 default:
96 return false;
97 }
98 }
99
100 // Return the symbol range of the token at \p TokLoc.
101 std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc,const SourceManager & SM,const LangOptions & LangOpts)102 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
103 const LangOptions &LangOpts) {
104 auto CreatePosition = [&SM](SourceLocation Loc) {
105 auto LSPLoc = sourceLocToPosition(SM, Loc);
106 SymbolLocation::Position Pos;
107 Pos.setLine(LSPLoc.line);
108 Pos.setColumn(LSPLoc.character);
109 return Pos;
110 };
111
112 auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
113 return {CreatePosition(TokLoc),
114 CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
115 }
116
117 // Checks whether \p ND is a good candidate to be the *canonical* declaration of
118 // its symbol (e.g. a go-to-declaration target). This overrides the default of
119 // using Clang's canonical declaration, which is the first in the TU.
120 //
121 // Example: preferring a class declaration over its forward declaration.
isPreferredDeclaration(const NamedDecl & ND,index::SymbolRoleSet Roles)122 bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
123 const auto &SM = ND.getASTContext().getSourceManager();
124 if (isa<TagDecl>(ND))
125 return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
126 !isInsideMainFile(ND.getLocation(), SM);
127 if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
128 return ID->isThisDeclarationADefinition();
129 if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
130 return PD->isThisDeclarationADefinition();
131 return false;
132 }
133
toRefKind(index::SymbolRoleSet Roles,bool Spelled=false)134 RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
135 RefKind Result = RefKind::Unknown;
136 if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
137 Result |= RefKind::Declaration;
138 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
139 Result |= RefKind::Definition;
140 if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
141 Result |= RefKind::Reference;
142 if (Spelled)
143 Result |= RefKind::Spelled;
144 return Result;
145 }
146
indexableRelation(const index::SymbolRelation & R)147 llvm::Optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
148 if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
149 return RelationKind::BaseOf;
150 if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
151 return RelationKind::OverriddenBy;
152 return None;
153 }
154
155 // Given a ref contained in enclosing decl `Enclosing`, return
156 // the decl that should be used as that ref's Ref::Container. This is
157 // usually `Enclosing` itself, but in cases where `Enclosing` is not
158 // indexed, we walk further up because Ref::Container should always be
159 // an indexed symbol.
160 // Note: we don't use DeclContext as the container as in some cases
161 // it's useful to use a Decl which is not a DeclContext. For example,
162 // for a ref occurring in the initializer of a namespace-scope variable,
163 // it's useful to use that variable as the container, as otherwise the
164 // next enclosing DeclContext would be a NamespaceDecl or TranslationUnitDecl,
165 // which are both not indexed and less granular than we'd like for use cases
166 // like call hierarchy.
getRefContainer(const Decl * Enclosing,const SymbolCollector::Options & Opts)167 const Decl *getRefContainer(const Decl *Enclosing,
168 const SymbolCollector::Options &Opts) {
169 while (Enclosing) {
170 const auto *ND = dyn_cast<NamedDecl>(Enclosing);
171 if (ND && SymbolCollector::shouldCollectSymbol(*ND, ND->getASTContext(),
172 Opts, true)) {
173 break;
174 }
175 Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
176 }
177 return Enclosing;
178 }
179
180 } // namespace
181
182 // Encapsulates decisions about how to record header paths in the index,
183 // including filename normalization, URI conversion etc.
184 // Expensive checks are cached internally.
185 class SymbolCollector::HeaderFileURICache {
186 // Weird double-indirect access to PP, which might not be ready yet when
187 // HeaderFiles is created but will be by the time it's used.
188 // (IndexDataConsumer::setPreprocessor can happen before or after initialize)
189 const std::shared_ptr<Preprocessor> &PP;
190 const SourceManager &SM;
191 const CanonicalIncludes *Includes;
192 llvm::StringRef FallbackDir;
193 llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
194 llvm::StringMap<std::string> CachePathToURI;
195 llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
196
197 public:
HeaderFileURICache(const std::shared_ptr<Preprocessor> & PP,const SourceManager & SM,const SymbolCollector::Options & Opts)198 HeaderFileURICache(const std::shared_ptr<Preprocessor> &PP,
199 const SourceManager &SM,
200 const SymbolCollector::Options &Opts)
201 : PP(PP), SM(SM), Includes(Opts.Includes), FallbackDir(Opts.FallbackDir) {
202 }
203
204 // Returns a canonical URI for the file \p FE.
205 // We attempt to make the path absolute first.
toURI(const FileEntry * FE)206 const std::string &toURI(const FileEntry *FE) {
207 auto R = CacheFEToURI.try_emplace(FE);
208 if (R.second) {
209 auto CanonPath = getCanonicalPath(FE, SM);
210 R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE->getName());
211 }
212 return *R.first->second;
213 }
214
215 // Returns a canonical URI for \p Path.
216 // If the file is in the FileManager, use that to canonicalize the path.
217 // We attempt to make the path absolute in any case.
toURI(llvm::StringRef Path)218 const std::string &toURI(llvm::StringRef Path) {
219 if (auto File = SM.getFileManager().getFile(Path))
220 return toURI(*File);
221 return toURIInternal(Path);
222 }
223
224 // Gets a canonical include (URI of the header or <header> or "header") for
225 // header of \p FID (which should usually be the *expansion* file).
226 // This does not account for any per-symbol overrides!
227 // Returns "" if includes should not be inserted for this file.
getIncludeHeader(FileID FID)228 llvm::StringRef getIncludeHeader(FileID FID) {
229 auto R = CacheFIDToInclude.try_emplace(FID);
230 if (R.second)
231 R.first->second = getIncludeHeaderUncached(FID);
232 return R.first->second;
233 }
234
235 private:
236 // This takes care of making paths absolute and path->URI caching, but no
237 // FileManager-based canonicalization.
toURIInternal(llvm::StringRef Path)238 const std::string &toURIInternal(llvm::StringRef Path) {
239 auto R = CachePathToURI.try_emplace(Path);
240 if (R.second) {
241 llvm::SmallString<256> AbsPath = Path;
242 if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
243 llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
244 assert(llvm::sys::path::is_absolute(AbsPath) &&
245 "If the VFS can't make paths absolute, a FallbackDir must be "
246 "provided");
247 llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
248 R.first->second = URI::create(AbsPath).toString();
249 }
250 return R.first->second;
251 }
252
getIncludeHeaderUncached(FileID FID)253 llvm::StringRef getIncludeHeaderUncached(FileID FID) {
254 const FileEntry *FE = SM.getFileEntryForID(FID);
255 if (!FE || FE->getName().empty())
256 return "";
257 llvm::StringRef Filename = FE->getName();
258 // If a file is mapped by canonical headers, use that mapping, regardless
259 // of whether it's an otherwise-good header (header guards etc).
260 if (Includes) {
261 llvm::StringRef Canonical = Includes->mapHeader(Filename);
262 if (!Canonical.empty()) {
263 // If we had a mapping, always use it.
264 if (Canonical.startswith("<") || Canonical.startswith("\""))
265 return Canonical;
266 return toURI(Canonical);
267 }
268 }
269 if (!isSelfContainedHeader(FID, FE)) {
270 // A .inc or .def file is often included into a real header to define
271 // symbols (e.g. LLVM tablegen files).
272 if (Filename.endswith(".inc") || Filename.endswith(".def"))
273 // Don't use cache reentrantly due to iterator invalidation.
274 return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
275 // Conservatively refuse to insert #includes to files without guards.
276 return "";
277 }
278 // Standard case: just insert the file itself.
279 return toURI(FE);
280 }
281
isSelfContainedHeader(FileID FID,const FileEntry * FE)282 bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
283 // FIXME: Should files that have been #import'd be considered
284 // self-contained? That's really a property of the includer,
285 // not of the file.
286 if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
287 !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
288 return false;
289 // This pattern indicates that a header can't be used without
290 // particular preprocessor state, usually set up by another header.
291 if (isDontIncludeMeHeader(SM.getBufferData(FID)))
292 return false;
293 return true;
294 }
295
296 // Is Line an #if or #ifdef directive?
isIf(llvm::StringRef Line)297 static bool isIf(llvm::StringRef Line) {
298 Line = Line.ltrim();
299 if (!Line.consume_front("#"))
300 return false;
301 Line = Line.ltrim();
302 return Line.startswith("if");
303 }
304
305 // Is Line an #error directive mentioning includes?
isErrorAboutInclude(llvm::StringRef Line)306 static bool isErrorAboutInclude(llvm::StringRef Line) {
307 Line = Line.ltrim();
308 if (!Line.consume_front("#"))
309 return false;
310 Line = Line.ltrim();
311 if (!Line.startswith("error"))
312 return false;
313 return Line.contains_insensitive(
314 "includ"); // Matches "include" or "including".
315 }
316
317 // Heuristically headers that only want to be included via an umbrella.
isDontIncludeMeHeader(llvm::StringRef Content)318 static bool isDontIncludeMeHeader(llvm::StringRef Content) {
319 llvm::StringRef Line;
320 // Only sniff up to 100 lines or 10KB.
321 Content = Content.take_front(100 * 100);
322 for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
323 std::tie(Line, Content) = Content.split('\n');
324 if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
325 return true;
326 }
327 return false;
328 }
329 };
330
331 // Return the symbol location of the token at \p TokLoc.
332 llvm::Optional<SymbolLocation>
getTokenLocation(SourceLocation TokLoc)333 SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
334 const auto &SM = ASTCtx->getSourceManager();
335 auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
336 if (!FE)
337 return None;
338
339 SymbolLocation Result;
340 Result.FileURI = HeaderFileURIs->toURI(FE).c_str();
341 auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
342 Result.Start = Range.first;
343 Result.End = Range.second;
344
345 return Result;
346 }
347
SymbolCollector(Options Opts)348 SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
349 SymbolCollector::~SymbolCollector() = default;
350
initialize(ASTContext & Ctx)351 void SymbolCollector::initialize(ASTContext &Ctx) {
352 ASTCtx = &Ctx;
353 HeaderFileURIs = std::make_unique<HeaderFileURICache>(
354 PP, ASTCtx->getSourceManager(), Opts);
355 CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
356 CompletionTUInfo =
357 std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
358 }
359
shouldCollectSymbol(const NamedDecl & ND,const ASTContext & ASTCtx,const Options & Opts,bool IsMainFileOnly)360 bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
361 const ASTContext &ASTCtx,
362 const Options &Opts,
363 bool IsMainFileOnly) {
364 // Skip anonymous declarations, e.g (anonymous enum/class/struct).
365 if (ND.getDeclName().isEmpty())
366 return false;
367
368 // Skip main-file symbols if we are not collecting them.
369 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
370 return false;
371
372 // Skip symbols in anonymous namespaces in header files.
373 if (!IsMainFileOnly && ND.isInAnonymousNamespace())
374 return false;
375
376 // For function local symbols, index only classes and its member functions.
377 if (index::isFunctionLocalSymbol(&ND))
378 return isa<RecordDecl>(ND) ||
379 (ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());
380
381 // We want most things but not "local" symbols such as symbols inside
382 // FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
383 // FIXME: Need a matcher for ExportDecl in order to include symbols declared
384 // within an export.
385 const auto *DeclCtx = ND.getDeclContext();
386 switch (DeclCtx->getDeclKind()) {
387 case Decl::TranslationUnit:
388 case Decl::Namespace:
389 case Decl::LinkageSpec:
390 case Decl::Enum:
391 case Decl::ObjCProtocol:
392 case Decl::ObjCInterface:
393 case Decl::ObjCCategory:
394 case Decl::ObjCCategoryImpl:
395 case Decl::ObjCImplementation:
396 break;
397 default:
398 // Record has a few derivations (e.g. CXXRecord, Class specialization), it's
399 // easier to cast.
400 if (!isa<RecordDecl>(DeclCtx))
401 return false;
402 }
403
404 // Avoid indexing internal symbols in protobuf generated headers.
405 if (isPrivateProtoDecl(ND))
406 return false;
407 return true;
408 }
409
410 // Always return true to continue indexing.
handleDeclOccurrence(const Decl * D,index::SymbolRoleSet Roles,llvm::ArrayRef<index::SymbolRelation> Relations,SourceLocation Loc,index::IndexDataConsumer::ASTNodeInfo ASTNode)411 bool SymbolCollector::handleDeclOccurrence(
412 const Decl *D, index::SymbolRoleSet Roles,
413 llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
414 index::IndexDataConsumer::ASTNodeInfo ASTNode) {
415 assert(ASTCtx && PP.get() && HeaderFileURIs);
416 assert(CompletionAllocator && CompletionTUInfo);
417 assert(ASTNode.OrigD);
418 // Indexing API puts canonical decl into D, which might not have a valid
419 // source location for implicit/built-in decls. Fallback to original decl in
420 // such cases.
421 if (D->getLocation().isInvalid())
422 D = ASTNode.OrigD;
423 // If OrigD is an declaration associated with a friend declaration and it's
424 // not a definition, skip it. Note that OrigD is the occurrence that the
425 // collector is currently visiting.
426 if ((ASTNode.OrigD->getFriendObjectKind() !=
427 Decl::FriendObjectKind::FOK_None) &&
428 !(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
429 return true;
430 // A declaration created for a friend declaration should not be used as the
431 // canonical declaration in the index. Use OrigD instead, unless we've already
432 // picked a replacement for D
433 if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
434 D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
435 // Flag to mark that D should be considered canonical meaning its declaration
436 // will override any previous declaration for the Symbol.
437 bool DeclIsCanonical = false;
438 // Avoid treating ObjCImplementationDecl as a canonical declaration if it has
439 // a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
440 if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
441 DeclIsCanonical = true;
442 if (const auto *CID = IID->getClassInterface())
443 if (const auto *DD = CID->getDefinition())
444 if (!DD->isImplicitInterfaceDecl())
445 D = DD;
446 }
447 // Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
448 // its ObjCCategoryDecl if it has one.
449 if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
450 DeclIsCanonical = true;
451 if (const auto *CD = CID->getCategoryDecl())
452 D = CD;
453 }
454 const NamedDecl *ND = dyn_cast<NamedDecl>(D);
455 if (!ND)
456 return true;
457
458 // Mark D as referenced if this is a reference coming from the main file.
459 // D may not be an interesting symbol, but it's cheaper to check at the end.
460 auto &SM = ASTCtx->getSourceManager();
461 if (Opts.CountReferences &&
462 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
463 SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
464 ReferencedDecls.insert(ND);
465
466 auto ID = getSymbolID(ND);
467 if (!ID)
468 return true;
469
470 // ND is the canonical (i.e. first) declaration. If it's in the main file
471 // (which is not a header), then no public declaration was visible, so assume
472 // it's main-file only.
473 bool IsMainFileOnly =
474 SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
475 !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
476 ASTCtx->getLangOpts());
477 // In C, printf is a redecl of an implicit builtin! So check OrigD instead.
478 if (ASTNode.OrigD->isImplicit() ||
479 !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
480 return true;
481
482 // Note: we need to process relations for all decl occurrences, including
483 // refs, because the indexing code only populates relations for specific
484 // occurrences. For example, RelationBaseOf is only populated for the
485 // occurrence inside the base-specifier.
486 processRelations(*ND, ID, Relations);
487
488 bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
489 bool IsOnlyRef =
490 !(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
491 static_cast<unsigned>(index::SymbolRole::Definition)));
492
493 if (IsOnlyRef && !CollectRef)
494 return true;
495
496 // Unlike other fields, e.g. Symbols (which use spelling locations), we use
497 // file locations for references (as it aligns the behavior of clangd's
498 // AST-based xref).
499 // FIXME: we should try to use the file locations for other fields.
500 if (CollectRef &&
501 (!IsMainFileOnly || Opts.CollectMainFileRefs ||
502 ND->isExternallyVisible()) &&
503 !isa<NamespaceDecl>(ND) &&
504 (Opts.RefsInHeaders ||
505 SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID()))
506 DeclRefs[ND].push_back(SymbolRef{SM.getFileLoc(Loc), Roles,
507 getRefContainer(ASTNode.Parent, Opts)});
508 // Don't continue indexing if this is a mere reference.
509 if (IsOnlyRef)
510 return true;
511
512 // FIXME: ObjCPropertyDecl are not properly indexed here:
513 // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
514 // not a NamedDecl.
515 auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
516 if (!OriginalDecl)
517 return true;
518
519 const Symbol *BasicSymbol = Symbols.find(ID);
520 if (isPreferredDeclaration(*OriginalDecl, Roles))
521 // If OriginalDecl is preferred, replace/create the existing canonical
522 // declaration (e.g. a class forward declaration). There should be at most
523 // one duplicate as we expect to see only one preferred declaration per
524 // TU, because in practice they are definitions.
525 BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
526 else if (!BasicSymbol || DeclIsCanonical)
527 BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
528
529 if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
530 addDefinition(*OriginalDecl, *BasicSymbol);
531
532 return true;
533 }
534
handleMacros(const MainFileMacros & MacroRefsToIndex)535 void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
536 assert(HeaderFileURIs && PP.get());
537 const auto &SM = PP->getSourceManager();
538 const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
539 assert(MainFileEntry);
540
541 const std::string &MainFileURI = HeaderFileURIs->toURI(MainFileEntry);
542 // Add macro references.
543 for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
544 for (const auto &MacroRef : IDToRefs.second) {
545 const auto &Range = MacroRef.Rng;
546 bool IsDefinition = MacroRef.IsDefinition;
547 Ref R;
548 R.Location.Start.setLine(Range.start.line);
549 R.Location.Start.setColumn(Range.start.character);
550 R.Location.End.setLine(Range.end.line);
551 R.Location.End.setColumn(Range.end.character);
552 R.Location.FileURI = MainFileURI.c_str();
553 R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
554 Refs.insert(IDToRefs.first, R);
555 if (IsDefinition) {
556 Symbol S;
557 S.ID = IDToRefs.first;
558 auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
559 auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
560 S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
561 S.SymInfo.Kind = index::SymbolKind::Macro;
562 S.SymInfo.SubKind = index::SymbolSubKind::None;
563 S.SymInfo.Properties = index::SymbolPropertySet();
564 S.SymInfo.Lang = index::SymbolLanguage::C;
565 S.Origin = Opts.Origin;
566 S.CanonicalDeclaration = R.Location;
567 Symbols.insert(S);
568 }
569 }
570 }
571 }
572
handleMacroOccurrence(const IdentifierInfo * Name,const MacroInfo * MI,index::SymbolRoleSet Roles,SourceLocation Loc)573 bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
574 const MacroInfo *MI,
575 index::SymbolRoleSet Roles,
576 SourceLocation Loc) {
577 assert(PP.get());
578 // Builtin macros don't have useful locations and aren't needed in completion.
579 if (MI->isBuiltinMacro())
580 return true;
581
582 const auto &SM = PP->getSourceManager();
583 auto DefLoc = MI->getDefinitionLoc();
584 // Also avoid storing predefined macros like __DBL_MIN__.
585 if (SM.isWrittenInBuiltinFile(DefLoc) ||
586 Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
587 return true;
588
589 auto ID = getSymbolID(Name->getName(), MI, SM);
590 if (!ID)
591 return true;
592
593 auto SpellingLoc = SM.getSpellingLoc(Loc);
594 bool IsMainFileOnly =
595 SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
596 !isHeaderFile(SM.getFileEntryForID(SM.getMainFileID())->getName(),
597 ASTCtx->getLangOpts());
598 // Do not store references to main-file macros.
599 if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
600 (Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID()))
601 // FIXME: Populate container information for macro references.
602 MacroRefs[ID].push_back({Loc, Roles, /*Container=*/nullptr});
603
604 // Collect symbols.
605 if (!Opts.CollectMacro)
606 return true;
607
608 // Skip main-file macros if we are not collecting them.
609 if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
610 return false;
611
612 // Mark the macro as referenced if this is a reference coming from the main
613 // file. The macro may not be an interesting symbol, but it's cheaper to check
614 // at the end.
615 if (Opts.CountReferences &&
616 (Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
617 SM.getFileID(SpellingLoc) == SM.getMainFileID())
618 ReferencedMacros.insert(Name);
619
620 // Don't continue indexing if this is a mere reference.
621 // FIXME: remove macro with ID if it is undefined.
622 if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
623 Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
624 return true;
625
626 // Only collect one instance in case there are multiple.
627 if (Symbols.find(ID) != nullptr)
628 return true;
629
630 Symbol S;
631 S.ID = std::move(ID);
632 S.Name = Name->getName();
633 if (!IsMainFileOnly) {
634 S.Flags |= Symbol::IndexedForCodeCompletion;
635 S.Flags |= Symbol::VisibleOutsideFile;
636 }
637 S.SymInfo = index::getSymbolInfoForMacro(*MI);
638 S.Origin = Opts.Origin;
639 // FIXME: use the result to filter out symbols.
640 shouldIndexFile(SM.getFileID(Loc));
641 if (auto DeclLoc = getTokenLocation(DefLoc))
642 S.CanonicalDeclaration = *DeclLoc;
643
644 CodeCompletionResult SymbolCompletion(Name);
645 const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
646 *PP, *CompletionAllocator, *CompletionTUInfo);
647 std::string Signature;
648 std::string SnippetSuffix;
649 getSignature(*CCS, &Signature, &SnippetSuffix);
650 S.Signature = Signature;
651 S.CompletionSnippetSuffix = SnippetSuffix;
652
653 IndexedMacros.insert(Name);
654 setIncludeLocation(S, DefLoc);
655 Symbols.insert(S);
656 return true;
657 }
658
processRelations(const NamedDecl & ND,const SymbolID & ID,ArrayRef<index::SymbolRelation> Relations)659 void SymbolCollector::processRelations(
660 const NamedDecl &ND, const SymbolID &ID,
661 ArrayRef<index::SymbolRelation> Relations) {
662 for (const auto &R : Relations) {
663 auto RKind = indexableRelation(R);
664 if (!RKind)
665 continue;
666 const Decl *Object = R.RelatedSymbol;
667
668 auto ObjectID = getSymbolID(Object);
669 if (!ObjectID)
670 continue;
671
672 // Record the relation.
673 // TODO: There may be cases where the object decl is not indexed for some
674 // reason. Those cases should probably be removed in due course, but for
675 // now there are two possible ways to handle it:
676 // (A) Avoid storing the relation in such cases.
677 // (B) Store it anyways. Clients will likely lookup() the SymbolID
678 // in the index and find nothing, but that's a situation they
679 // probably need to handle for other reasons anyways.
680 // We currently do (B) because it's simpler.
681 if (*RKind == RelationKind::BaseOf)
682 this->Relations.insert({ID, *RKind, ObjectID});
683 else if (*RKind == RelationKind::OverriddenBy)
684 this->Relations.insert({ObjectID, *RKind, ID});
685 }
686 }
687
setIncludeLocation(const Symbol & S,SourceLocation Loc)688 void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
689 if (Opts.CollectIncludePath)
690 if (shouldCollectIncludePath(S.SymInfo.Kind))
691 // Use the expansion location to get the #include header since this is
692 // where the symbol is exposed.
693 IncludeFiles[S.ID] =
694 PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
695 }
696
finish()697 void SymbolCollector::finish() {
698 // At the end of the TU, add 1 to the refcount of all referenced symbols.
699 auto IncRef = [this](const SymbolID &ID) {
700 if (const auto *S = Symbols.find(ID)) {
701 Symbol Inc = *S;
702 ++Inc.References;
703 Symbols.insert(Inc);
704 }
705 };
706 for (const NamedDecl *ND : ReferencedDecls) {
707 if (auto ID = getSymbolID(ND)) {
708 IncRef(ID);
709 }
710 }
711 if (Opts.CollectMacro) {
712 assert(PP);
713 // First, drop header guards. We can't identify these until EOF.
714 for (const IdentifierInfo *II : IndexedMacros) {
715 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
716 if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
717 if (MI->isUsedForHeaderGuard())
718 Symbols.erase(ID);
719 }
720 // Now increment refcounts.
721 for (const IdentifierInfo *II : ReferencedMacros) {
722 if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
723 if (auto ID = getSymbolID(II->getName(), MI, PP->getSourceManager()))
724 IncRef(ID);
725 }
726 }
727 // Fill in IncludeHeaders.
728 // We delay this until end of TU so header guards are all resolved.
729 llvm::SmallString<128> QName;
730 for (const auto &Entry : IncludeFiles) {
731 if (const Symbol *S = Symbols.find(Entry.first)) {
732 llvm::StringRef IncludeHeader;
733 // Look for an overridden include header for this symbol specifically.
734 if (Opts.Includes) {
735 QName = S->Scope;
736 QName.append(S->Name);
737 IncludeHeader = Opts.Includes->mapSymbol(QName);
738 if (!IncludeHeader.empty()) {
739 if (IncludeHeader.front() != '"' && IncludeHeader.front() != '<')
740 IncludeHeader = HeaderFileURIs->toURI(IncludeHeader);
741 else if (IncludeHeader == "<utility>" && QName == "std::move" &&
742 S->Signature.contains(','))
743 IncludeHeader = "<algorithm>";
744 }
745 }
746 // Otherwise find the approprate include header for the defining file.
747 if (IncludeHeader.empty())
748 IncludeHeader = HeaderFileURIs->getIncludeHeader(Entry.second);
749
750 // Symbols in slabs aren't mutable, insert() has to walk all the strings
751 if (!IncludeHeader.empty()) {
752 Symbol NewSym = *S;
753 NewSym.IncludeHeaders.push_back({IncludeHeader, 1});
754 Symbols.insert(NewSym);
755 }
756 }
757 }
758
759 const auto &SM = ASTCtx->getSourceManager();
760 auto CollectRef = [&](SymbolID ID, const SymbolRef &LocAndRole,
761 bool Spelled = false) {
762 auto FileID = SM.getFileID(LocAndRole.Loc);
763 // FIXME: use the result to filter out references.
764 shouldIndexFile(FileID);
765 if (const auto *FE = SM.getFileEntryForID(FileID)) {
766 auto Range = getTokenRange(LocAndRole.Loc, SM, ASTCtx->getLangOpts());
767 Ref R;
768 R.Location.Start = Range.first;
769 R.Location.End = Range.second;
770 R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
771 R.Kind = toRefKind(LocAndRole.Roles, Spelled);
772 R.Container = getSymbolID(LocAndRole.Container);
773 Refs.insert(ID, R);
774 }
775 };
776 // Populate Refs slab from MacroRefs.
777 // FIXME: All MacroRefs are marked as Spelled now, but this should be checked.
778 for (const auto &IDAndRefs : MacroRefs)
779 for (const auto &LocAndRole : IDAndRefs.second)
780 CollectRef(IDAndRefs.first, LocAndRole, /*Spelled=*/true);
781 // Populate Refs slab from DeclRefs.
782 llvm::DenseMap<FileID, std::vector<syntax::Token>> FilesToTokensCache;
783 for (auto &DeclAndRef : DeclRefs) {
784 if (auto ID = getSymbolID(DeclAndRef.first)) {
785 for (auto &LocAndRole : DeclAndRef.second) {
786 const auto FileID = SM.getFileID(LocAndRole.Loc);
787 // FIXME: It's better to use TokenBuffer by passing spelled tokens from
788 // the caller of SymbolCollector.
789 if (!FilesToTokensCache.count(FileID))
790 FilesToTokensCache[FileID] =
791 syntax::tokenize(FileID, SM, ASTCtx->getLangOpts());
792 llvm::ArrayRef<syntax::Token> Tokens = FilesToTokensCache[FileID];
793 // Check if the referenced symbol is spelled exactly the same way the
794 // corresponding NamedDecl is. If it is, mark this reference as spelled.
795 const auto *IdentifierToken =
796 spelledIdentifierTouching(LocAndRole.Loc, Tokens);
797 DeclarationName Name = DeclAndRef.first->getDeclName();
798 const auto NameKind = Name.getNameKind();
799 bool IsTargetKind = NameKind == DeclarationName::Identifier ||
800 NameKind == DeclarationName::CXXConstructorName;
801 bool Spelled = IdentifierToken && IsTargetKind &&
802 Name.getAsString() == IdentifierToken->text(SM);
803 CollectRef(ID, LocAndRole, Spelled);
804 }
805 }
806 }
807
808 ReferencedDecls.clear();
809 ReferencedMacros.clear();
810 DeclRefs.clear();
811 IncludeFiles.clear();
812 }
813
addDeclaration(const NamedDecl & ND,SymbolID ID,bool IsMainFileOnly)814 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
815 bool IsMainFileOnly) {
816 auto &Ctx = ND.getASTContext();
817 auto &SM = Ctx.getSourceManager();
818
819 Symbol S;
820 S.ID = std::move(ID);
821 std::string QName = printQualifiedName(ND);
822 // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
823 // for consistency with CodeCompletionString and a clean name/signature split.
824 std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
825 std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
826 S.TemplateSpecializationArgs = TemplateSpecializationArgs;
827
828 // We collect main-file symbols, but do not use them for code completion.
829 if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
830 S.Flags |= Symbol::IndexedForCodeCompletion;
831 if (isImplementationDetail(&ND))
832 S.Flags |= Symbol::ImplementationDetail;
833 if (!IsMainFileOnly)
834 S.Flags |= Symbol::VisibleOutsideFile;
835 S.SymInfo = index::getSymbolInfo(&ND);
836 auto Loc = nameLocation(ND, SM);
837 assert(Loc.isValid() && "Invalid source location for NamedDecl");
838 // FIXME: use the result to filter out symbols.
839 shouldIndexFile(SM.getFileID(Loc));
840 if (auto DeclLoc = getTokenLocation(Loc))
841 S.CanonicalDeclaration = *DeclLoc;
842
843 S.Origin = Opts.Origin;
844 if (ND.getAvailability() == AR_Deprecated)
845 S.Flags |= Symbol::Deprecated;
846
847 // Add completion info.
848 // FIXME: we may want to choose a different redecl, or combine from several.
849 assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set.");
850 // We use the primary template, as clang does during code completion.
851 CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
852 const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
853 *ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
854 *CompletionTUInfo,
855 /*IncludeBriefComments*/ false);
856 std::string Documentation =
857 formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
858 /*CommentsFromHeaders=*/true));
859 if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
860 if (Opts.StoreAllDocumentation)
861 S.Documentation = Documentation;
862 Symbols.insert(S);
863 return Symbols.find(S.ID);
864 }
865 S.Documentation = Documentation;
866 std::string Signature;
867 std::string SnippetSuffix;
868 getSignature(*CCS, &Signature, &SnippetSuffix);
869 S.Signature = Signature;
870 S.CompletionSnippetSuffix = SnippetSuffix;
871 std::string ReturnType = getReturnType(*CCS);
872 S.ReturnType = ReturnType;
873
874 llvm::Optional<OpaqueType> TypeStorage;
875 if (S.Flags & Symbol::IndexedForCodeCompletion) {
876 TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
877 if (TypeStorage)
878 S.Type = TypeStorage->raw();
879 }
880
881 Symbols.insert(S);
882 setIncludeLocation(S, ND.getLocation());
883 return Symbols.find(S.ID);
884 }
885
addDefinition(const NamedDecl & ND,const Symbol & DeclSym)886 void SymbolCollector::addDefinition(const NamedDecl &ND,
887 const Symbol &DeclSym) {
888 if (DeclSym.Definition)
889 return;
890 // If we saw some forward declaration, we end up copying the symbol.
891 // This is not ideal, but avoids duplicating the "is this a definition" check
892 // in clang::index. We should only see one definition.
893 Symbol S = DeclSym;
894 const auto &SM = ND.getASTContext().getSourceManager();
895 auto Loc = nameLocation(ND, SM);
896 // FIXME: use the result to filter out symbols.
897 shouldIndexFile(SM.getFileID(Loc));
898 if (auto DefLoc = getTokenLocation(Loc))
899 S.Definition = *DefLoc;
900 Symbols.insert(S);
901 }
902
shouldIndexFile(FileID FID)903 bool SymbolCollector::shouldIndexFile(FileID FID) {
904 if (!Opts.FileFilter)
905 return true;
906 auto I = FilesToIndexCache.try_emplace(FID);
907 if (I.second)
908 I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
909 return I.first->second;
910 }
911
912 } // namespace clangd
913 } // namespace clang
914